{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use Soft Actor-Critic to Play LunarLander-v2\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "import copy\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "torch.manual_seed(0)\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.distributions as distributions\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "06:44:32 [INFO] env: <LunarLander<LunarLander-v2>>\n",
      "06:44:32 [INFO] action_space: Discrete(4)\n",
      "06:44:32 [INFO] observation_space: Box(-inf, inf, (8,), float32)\n",
      "06:44:32 [INFO] reward_range: (-inf, inf)\n",
      "06:44:32 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "06:44:32 [INFO] _max_episode_steps: 1000\n",
      "06:44:32 [INFO] _elapsed_steps: None\n",
      "06:44:32 [INFO] id: LunarLander-v2\n",
      "06:44:32 [INFO] entry_point: gym.envs.box2d:LunarLander\n",
      "06:44:32 [INFO] reward_threshold: 200\n",
      "06:44:32 [INFO] nondeterministic: False\n",
      "06:44:32 [INFO] max_episode_steps: 1000\n",
      "06:44:32 [INFO] _kwargs: {}\n",
      "06:44:32 [INFO] _env_name: LunarLander\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('LunarLander-v2')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SACAgent:\n",
    "    def __init__(self, env):\n",
    "        state_dim = env.observation_space.shape[0]\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "        self.replayer = DQNReplayer(10000)\n",
    "\n",
    "        self.alpha = 0.02\n",
    "\n",
    "        # create actor\n",
    "        self.actor_net = self.build_net(input_size=state_dim,\n",
    "                hidden_sizes=[256, 256],\n",
    "                output_size=self.action_n, output_activator=nn.Softmax(-1))\n",
    "        self.actor_optimizer = optim.Adam(self.actor_net.parameters(), lr=3e-4)\n",
    "\n",
    "        # create V critic\n",
    "        self.v_evaluate_net = self.build_net(input_size=state_dim,\n",
    "                hidden_sizes=[256, 256])\n",
    "        self.v_target_net = copy.deepcopy(self.v_evaluate_net)\n",
    "        self.v_optimizer = optim.Adam(self.v_evaluate_net.parameters(), lr=3e-4)\n",
    "        self.v_loss = nn.MSELoss()\n",
    "\n",
    "        # create Q critic\n",
    "        self.q0_net = self.build_net(input_size=state_dim,\n",
    "                hidden_sizes=[256, 256], output_size=self.action_n)\n",
    "        self.q1_net = self.build_net(input_size=state_dim,\n",
    "                hidden_sizes=[256, 256], output_size=self.action_n)\n",
    "        self.q0_loss = nn.MSELoss()\n",
    "        self.q1_loss = nn.MSELoss()\n",
    "        self.q0_optimizer = optim.Adam(self.q0_net.parameters(), lr=3e-4)\n",
    "        self.q1_optimizer = optim.Adam(self.q1_net.parameters(), lr=3e-4)\n",
    "\n",
    "    def build_net(self, input_size, hidden_sizes, output_size=1,\n",
    "            output_activator=None):\n",
    "        layers = []\n",
    "        for input_size, output_size in zip(\n",
    "                [input_size,] + hidden_sizes, hidden_sizes + [output_size,]):\n",
    "            layers.append(nn.Linear(input_size, output_size))\n",
    "            layers.append(nn.ReLU())\n",
    "        layers = layers[:-1]\n",
    "        if output_activator:\n",
    "            layers.append(output_activator)\n",
    "        net = nn.Sequential(*layers)\n",
    "        return net\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = torch.as_tensor(observation, dtype=torch.float).unsqueeze(0)\n",
    "        prob_tensor = self.actor_net(state_tensor)\n",
    "        action_tensor = distributions.Categorical(prob_tensor).sample()\n",
    "        action = action_tensor.numpy()[0]\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, action, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, action, reward, next_state, done)\n",
    "            if self.replayer.count >= 500:\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def update_net(self, target_net, evaluate_net, learning_rate=0.0025):\n",
    "        for target_param, evaluate_param in zip(\n",
    "                target_net.parameters(), evaluate_net.parameters()):\n",
    "            target_param.data.copy_(learning_rate * evaluate_param.data\n",
    "                    + (1 - learning_rate) * target_param.data)\n",
    "\n",
    "    def learn(self):\n",
    "        states, actions, rewards, next_states, dones = self.replayer.sample(128)\n",
    "        state_tensor = torch.as_tensor(states, dtype=torch.float)\n",
    "        action_tensor = torch.as_tensor(actions, dtype=torch.long)\n",
    "        reward_tensor = torch.as_tensor(rewards, dtype=torch.float)\n",
    "        next_state_tensor = torch.as_tensor(next_states, dtype=torch.float)\n",
    "        done_tensor = torch.as_tensor(dones, dtype=torch.float)\n",
    "\n",
    "        # train Q critic\n",
    "        next_v_tensor = self.v_target_net(next_state_tensor)\n",
    "        q_target_tensor = reward_tensor.unsqueeze(1) + self.gamma * \\\n",
    "                (1. - done_tensor.unsqueeze(1)) * next_v_tensor\n",
    "\n",
    "        all_q0_pred_tensor = self.q0_net(state_tensor)\n",
    "        q0_pred_tensor = torch.gather(all_q0_pred_tensor, 1, action_tensor.unsqueeze(1))\n",
    "        q0_loss_tensor = self.q0_loss(q0_pred_tensor, q_target_tensor.detach())\n",
    "        self.q0_optimizer.zero_grad()\n",
    "        q0_loss_tensor.backward()\n",
    "        self.q0_optimizer.step()\n",
    "\n",
    "        all_q1_pred_tensor = self.q1_net(state_tensor)\n",
    "        q1_pred_tensor = torch.gather(all_q1_pred_tensor, 1, action_tensor.unsqueeze(1))\n",
    "        q1_loss_tensor = self.q1_loss(q1_pred_tensor, q_target_tensor.detach())\n",
    "        self.q1_optimizer.zero_grad()\n",
    "        q1_loss_tensor.backward()\n",
    "        self.q1_optimizer.step()\n",
    "\n",
    "        # train V critic\n",
    "        q0_tensor = self.q0_net(state_tensor)\n",
    "        q1_tensor = self.q1_net(state_tensor)\n",
    "        q01_tensor = torch.min(q0_tensor, q1_tensor)\n",
    "        prob_tensor = self.actor_net(state_tensor)\n",
    "        ln_prob_tensor = torch.log(prob_tensor.clamp(1e-6, 1.))\n",
    "        entropic_q01_tensor = prob_tensor * (q01_tensor -\n",
    "                self.alpha * ln_prob_tensor)\n",
    "        # OR entropic_q01_tensor = prob_tensor * (q01_tensor - \\\n",
    "        #         self.alpha * torch.xlogy(prob_tensor, prob_tensor)\n",
    "        v_target_tensor = torch.sum(entropic_q01_tensor, dim=-1, keepdim=True)\n",
    "        v_pred_tensor = self.v_evaluate_net(state_tensor)\n",
    "        v_loss_tensor = self.v_loss(v_pred_tensor, v_target_tensor.detach())\n",
    "        self.v_optimizer.zero_grad()\n",
    "        v_loss_tensor.backward()\n",
    "        self.v_optimizer.step()\n",
    "\n",
    "        self.update_net(self.v_target_net, self.v_evaluate_net)\n",
    "\n",
    "        # train actor\n",
    "        prob_q_tensor = prob_tensor * (self.alpha * ln_prob_tensor - q0_tensor)\n",
    "        actor_loss_tensor = prob_q_tensor.sum(axis=-1).mean()\n",
    "        self.actor_optimizer.zero_grad()\n",
    "        actor_loss_tensor.backward()\n",
    "        self.actor_optimizer.step()\n",
    "\n",
    "\n",
    "agent = SACAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "06:44:33 [INFO] ==== train ====\n",
      "06:44:33 [DEBUG] train episode 0: reward = -119.74, steps = 100\n",
      "06:44:33 [DEBUG] train episode 1: reward = -53.41, steps = 63\n",
      "06:44:33 [DEBUG] train episode 2: reward = -136.45, steps = 87\n",
      "06:44:33 [DEBUG] train episode 3: reward = -326.87, steps = 70\n",
      "06:44:33 [DEBUG] train episode 4: reward = -117.95, steps = 113\n",
      "06:44:34 [DEBUG] train episode 5: reward = -431.55, steps = 99\n",
      "06:44:37 [DEBUG] train episode 6: reward = -344.16, steps = 79\n",
      "06:44:40 [DEBUG] train episode 7: reward = -124.00, steps = 73\n",
      "06:44:43 [DEBUG] train episode 8: reward = -504.88, steps = 85\n",
      "06:44:46 [DEBUG] train episode 9: reward = -453.93, steps = 79\n",
      "06:44:48 [DEBUG] train episode 10: reward = -347.43, steps = 71\n",
      "06:44:51 [DEBUG] train episode 11: reward = -244.51, steps = 73\n",
      "06:44:54 [DEBUG] train episode 12: reward = -291.91, steps = 73\n",
      "06:44:56 [DEBUG] train episode 13: reward = -398.81, steps = 73\n",
      "06:44:59 [DEBUG] train episode 14: reward = -281.87, steps = 68\n",
      "06:45:01 [DEBUG] train episode 15: reward = -262.03, steps = 53\n",
      "06:45:03 [DEBUG] train episode 16: reward = -213.43, steps = 61\n",
      "06:45:05 [DEBUG] train episode 17: reward = -308.40, steps = 56\n",
      "06:45:08 [DEBUG] train episode 18: reward = -242.01, steps = 76\n",
      "06:45:13 [DEBUG] train episode 19: reward = -353.46, steps = 113\n",
      "06:45:15 [DEBUG] train episode 20: reward = -209.88, steps = 75\n",
      "06:45:19 [DEBUG] train episode 21: reward = -308.92, steps = 98\n",
      "06:45:22 [DEBUG] train episode 22: reward = 9.74, steps = 87\n",
      "06:45:37 [DEBUG] train episode 23: reward = -520.48, steps = 412\n",
      "06:45:46 [DEBUG] train episode 24: reward = -341.28, steps = 229\n",
      "06:45:51 [DEBUG] train episode 25: reward = -59.83, steps = 153\n",
      "06:45:55 [DEBUG] train episode 26: reward = -122.98, steps = 110\n",
      "06:46:20 [DEBUG] train episode 27: reward = -646.65, steps = 668\n",
      "06:46:55 [DEBUG] train episode 28: reward = -480.28, steps = 920\n",
      "06:47:08 [DEBUG] train episode 29: reward = -272.43, steps = 357\n",
      "06:47:15 [DEBUG] train episode 30: reward = -305.03, steps = 173\n",
      "06:47:28 [DEBUG] train episode 31: reward = -199.98, steps = 368\n",
      "06:47:40 [DEBUG] train episode 32: reward = -124.04, steps = 320\n",
      "06:47:52 [DEBUG] train episode 33: reward = -118.74, steps = 317\n",
      "06:48:04 [DEBUG] train episode 34: reward = -215.16, steps = 338\n",
      "06:48:17 [DEBUG] train episode 35: reward = -232.50, steps = 356\n",
      "06:48:43 [DEBUG] train episode 36: reward = -297.74, steps = 693\n",
      "06:49:02 [DEBUG] train episode 37: reward = -262.43, steps = 496\n",
      "06:49:15 [DEBUG] train episode 38: reward = -269.07, steps = 345\n",
      "06:49:22 [DEBUG] train episode 39: reward = -190.48, steps = 191\n",
      "06:49:31 [DEBUG] train episode 40: reward = -206.82, steps = 237\n",
      "06:49:42 [DEBUG] train episode 41: reward = -196.88, steps = 315\n",
      "06:49:50 [DEBUG] train episode 42: reward = -147.69, steps = 200\n",
      "06:49:58 [DEBUG] train episode 43: reward = -210.92, steps = 239\n",
      "06:50:06 [DEBUG] train episode 44: reward = -169.53, steps = 197\n",
      "06:50:37 [DEBUG] train episode 45: reward = -320.28, steps = 840\n",
      "06:50:48 [DEBUG] train episode 46: reward = -248.61, steps = 299\n",
      "06:50:58 [DEBUG] train episode 47: reward = -239.37, steps = 265\n",
      "06:51:04 [DEBUG] train episode 48: reward = -176.54, steps = 167\n",
      "06:51:36 [DEBUG] train episode 49: reward = -311.47, steps = 820\n",
      "06:51:45 [DEBUG] train episode 50: reward = -147.75, steps = 261\n",
      "06:52:03 [DEBUG] train episode 51: reward = -241.43, steps = 471\n",
      "06:52:10 [DEBUG] train episode 52: reward = -152.05, steps = 211\n",
      "06:52:27 [DEBUG] train episode 53: reward = -226.51, steps = 439\n",
      "06:52:34 [DEBUG] train episode 54: reward = -150.29, steps = 188\n",
      "06:52:44 [DEBUG] train episode 55: reward = -191.25, steps = 277\n",
      "06:52:54 [DEBUG] train episode 56: reward = -156.33, steps = 275\n",
      "06:53:08 [DEBUG] train episode 57: reward = -204.69, steps = 385\n",
      "06:53:19 [DEBUG] train episode 58: reward = -180.31, steps = 297\n",
      "06:53:40 [DEBUG] train episode 59: reward = -190.51, steps = 571\n",
      "06:53:54 [DEBUG] train episode 60: reward = -179.89, steps = 364\n",
      "06:54:16 [DEBUG] train episode 61: reward = -224.54, steps = 595\n",
      "06:54:41 [DEBUG] train episode 62: reward = -255.25, steps = 645\n",
      "06:55:18 [DEBUG] train episode 63: reward = -282.85, steps = 984\n",
      "06:55:36 [DEBUG] train episode 64: reward = -171.43, steps = 464\n",
      "06:55:51 [DEBUG] train episode 65: reward = -202.57, steps = 399\n",
      "06:56:26 [DEBUG] train episode 66: reward = -262.52, steps = 910\n",
      "06:56:36 [DEBUG] train episode 67: reward = -103.61, steps = 277\n",
      "06:56:45 [DEBUG] train episode 68: reward = -117.21, steps = 246\n",
      "06:57:23 [DEBUG] train episode 69: reward = -157.12, steps = 1000\n",
      "06:57:53 [DEBUG] train episode 70: reward = -251.57, steps = 792\n",
      "06:58:30 [DEBUG] train episode 71: reward = -176.52, steps = 1000\n",
      "06:58:43 [DEBUG] train episode 72: reward = -154.51, steps = 316\n",
      "06:59:21 [DEBUG] train episode 73: reward = -125.18, steps = 1000\n",
      "06:59:59 [DEBUG] train episode 74: reward = -91.01, steps = 1000\n",
      "07:00:39 [DEBUG] train episode 75: reward = -152.42, steps = 1000\n",
      "07:01:17 [DEBUG] train episode 76: reward = -136.96, steps = 1000\n",
      "07:01:57 [DEBUG] train episode 77: reward = -155.50, steps = 1000\n",
      "07:02:35 [DEBUG] train episode 78: reward = -92.97, steps = 1000\n",
      "07:03:13 [DEBUG] train episode 79: reward = -69.89, steps = 1000\n",
      "07:03:49 [DEBUG] train episode 80: reward = -103.41, steps = 1000\n",
      "07:04:26 [DEBUG] train episode 81: reward = -79.75, steps = 1000\n",
      "07:05:03 [DEBUG] train episode 82: reward = -105.82, steps = 1000\n",
      "07:05:42 [DEBUG] train episode 83: reward = -136.08, steps = 1000\n",
      "07:06:23 [DEBUG] train episode 84: reward = -119.86, steps = 1000\n",
      "07:07:04 [DEBUG] train episode 85: reward = -76.90, steps = 1000\n",
      "07:07:45 [DEBUG] train episode 86: reward = -97.73, steps = 1000\n",
      "07:08:25 [DEBUG] train episode 87: reward = -146.23, steps = 1000\n",
      "07:09:04 [DEBUG] train episode 88: reward = -120.31, steps = 1000\n",
      "07:09:45 [DEBUG] train episode 89: reward = -113.89, steps = 1000\n",
      "07:10:26 [DEBUG] train episode 90: reward = -106.02, steps = 1000\n",
      "07:11:07 [DEBUG] train episode 91: reward = -19.21, steps = 1000\n",
      "07:11:49 [DEBUG] train episode 92: reward = -116.32, steps = 1000\n",
      "07:12:29 [DEBUG] train episode 93: reward = -57.89, steps = 1000\n",
      "07:13:10 [DEBUG] train episode 94: reward = -92.17, steps = 1000\n",
      "07:13:52 [DEBUG] train episode 95: reward = -122.81, steps = 1000\n",
      "07:14:33 [DEBUG] train episode 96: reward = -144.50, steps = 1000\n",
      "07:15:14 [DEBUG] train episode 97: reward = -87.08, steps = 1000\n",
      "07:15:56 [DEBUG] train episode 98: reward = -125.38, steps = 1000\n",
      "07:16:37 [DEBUG] train episode 99: reward = -64.51, steps = 1000\n",
      "07:17:18 [DEBUG] train episode 100: reward = -111.63, steps = 1000\n",
      "07:17:59 [DEBUG] train episode 101: reward = -52.33, steps = 1000\n",
      "07:18:43 [DEBUG] train episode 102: reward = -126.03, steps = 1000\n",
      "07:19:24 [DEBUG] train episode 103: reward = -125.11, steps = 1000\n",
      "07:19:27 [DEBUG] train episode 104: reward = -205.00, steps = 76\n",
      "07:20:09 [DEBUG] train episode 105: reward = -146.56, steps = 1000\n",
      "07:20:52 [DEBUG] train episode 106: reward = -139.96, steps = 1000\n",
      "07:21:32 [DEBUG] train episode 107: reward = -124.79, steps = 1000\n",
      "07:22:12 [DEBUG] train episode 108: reward = -114.53, steps = 1000\n",
      "07:22:52 [DEBUG] train episode 109: reward = -95.93, steps = 1000\n",
      "07:23:35 [DEBUG] train episode 110: reward = -123.12, steps = 1000\n",
      "07:24:18 [DEBUG] train episode 111: reward = -80.41, steps = 1000\n",
      "07:25:00 [DEBUG] train episode 112: reward = -76.70, steps = 1000\n",
      "07:25:44 [DEBUG] train episode 113: reward = -105.85, steps = 1000\n",
      "07:26:29 [DEBUG] train episode 114: reward = -107.08, steps = 1000\n",
      "07:27:12 [DEBUG] train episode 115: reward = -129.47, steps = 1000\n",
      "07:27:57 [DEBUG] train episode 116: reward = -48.79, steps = 1000\n",
      "07:28:43 [DEBUG] train episode 117: reward = -50.67, steps = 1000\n",
      "07:29:29 [DEBUG] train episode 118: reward = -99.13, steps = 1000\n",
      "07:30:15 [DEBUG] train episode 119: reward = -95.30, steps = 1000\n",
      "07:31:00 [DEBUG] train episode 120: reward = -131.54, steps = 1000\n",
      "07:31:22 [DEBUG] train episode 121: reward = 234.60, steps = 489\n",
      "07:32:08 [DEBUG] train episode 122: reward = -134.29, steps = 1000\n",
      "07:32:53 [DEBUG] train episode 123: reward = -63.59, steps = 1000\n",
      "07:33:26 [DEBUG] train episode 124: reward = -80.91, steps = 784\n",
      "07:34:11 [DEBUG] train episode 125: reward = -115.63, steps = 1000\n",
      "07:34:54 [DEBUG] train episode 126: reward = -82.44, steps = 1000\n",
      "07:35:40 [DEBUG] train episode 127: reward = -128.58, steps = 1000\n",
      "07:36:24 [DEBUG] train episode 128: reward = -67.69, steps = 1000\n",
      "07:37:09 [DEBUG] train episode 129: reward = -136.20, steps = 1000\n",
      "07:37:54 [DEBUG] train episode 130: reward = -103.51, steps = 1000\n",
      "07:38:40 [DEBUG] train episode 131: reward = -146.59, steps = 1000\n",
      "07:39:25 [DEBUG] train episode 132: reward = -69.08, steps = 1000\n",
      "07:40:10 [DEBUG] train episode 133: reward = -66.15, steps = 1000\n",
      "07:40:56 [DEBUG] train episode 134: reward = -125.93, steps = 1000\n",
      "07:41:43 [DEBUG] train episode 135: reward = -100.82, steps = 1000\n",
      "07:42:31 [DEBUG] train episode 136: reward = -136.78, steps = 1000\n",
      "07:42:40 [DEBUG] train episode 137: reward = 1.99, steps = 202\n",
      "07:42:44 [DEBUG] train episode 138: reward = -411.09, steps = 92\n",
      "07:43:29 [DEBUG] train episode 139: reward = -84.54, steps = 1000\n",
      "07:44:13 [DEBUG] train episode 140: reward = -95.32, steps = 1000\n",
      "07:45:00 [DEBUG] train episode 141: reward = -64.36, steps = 1000\n",
      "07:45:46 [DEBUG] train episode 142: reward = -46.74, steps = 1000\n",
      "07:46:32 [DEBUG] train episode 143: reward = -111.39, steps = 1000\n",
      "07:47:18 [DEBUG] train episode 144: reward = -133.64, steps = 1000\n",
      "07:48:05 [DEBUG] train episode 145: reward = -173.13, steps = 1000\n",
      "07:48:59 [DEBUG] train episode 146: reward = -139.49, steps = 1000\n",
      "07:49:54 [DEBUG] train episode 147: reward = -110.06, steps = 1000\n",
      "07:50:42 [DEBUG] train episode 148: reward = -120.51, steps = 1000\n",
      "07:51:03 [DEBUG] train episode 149: reward = -194.49, steps = 449\n",
      "07:51:50 [DEBUG] train episode 150: reward = -91.71, steps = 1000\n",
      "07:52:38 [DEBUG] train episode 151: reward = -109.07, steps = 1000\n",
      "07:53:25 [DEBUG] train episode 152: reward = -120.29, steps = 1000\n",
      "07:54:12 [DEBUG] train episode 153: reward = -77.45, steps = 1000\n",
      "07:55:00 [DEBUG] train episode 154: reward = -106.42, steps = 1000\n",
      "07:55:49 [DEBUG] train episode 155: reward = -135.87, steps = 1000\n",
      "07:56:37 [DEBUG] train episode 156: reward = -87.26, steps = 1000\n",
      "07:57:26 [DEBUG] train episode 157: reward = -37.32, steps = 1000\n",
      "07:58:12 [DEBUG] train episode 158: reward = -108.85, steps = 1000\n",
      "07:59:00 [DEBUG] train episode 159: reward = -92.93, steps = 1000\n",
      "07:59:48 [DEBUG] train episode 160: reward = -51.78, steps = 1000\n",
      "08:00:36 [DEBUG] train episode 161: reward = -94.08, steps = 1000\n",
      "08:01:29 [DEBUG] train episode 162: reward = -65.62, steps = 1000\n",
      "08:02:24 [DEBUG] train episode 163: reward = -96.11, steps = 1000\n",
      "08:03:19 [DEBUG] train episode 164: reward = -72.80, steps = 1000\n",
      "08:04:07 [DEBUG] train episode 165: reward = -90.31, steps = 1000\n",
      "08:04:58 [DEBUG] train episode 166: reward = -103.97, steps = 1000\n",
      "08:05:46 [DEBUG] train episode 167: reward = -110.74, steps = 1000\n",
      "08:06:34 [DEBUG] train episode 168: reward = -87.47, steps = 1000\n",
      "08:07:22 [DEBUG] train episode 169: reward = -110.01, steps = 1000\n",
      "08:08:10 [DEBUG] train episode 170: reward = -72.90, steps = 1000\n",
      "08:08:57 [DEBUG] train episode 171: reward = -102.21, steps = 1000\n",
      "08:09:43 [DEBUG] train episode 172: reward = -78.22, steps = 1000\n",
      "08:10:30 [DEBUG] train episode 173: reward = -134.52, steps = 1000\n",
      "08:11:17 [DEBUG] train episode 174: reward = -112.33, steps = 1000\n",
      "08:12:06 [DEBUG] train episode 175: reward = -124.90, steps = 1000\n",
      "08:12:54 [DEBUG] train episode 176: reward = -104.53, steps = 1000\n",
      "08:13:41 [DEBUG] train episode 177: reward = -104.41, steps = 1000\n",
      "08:13:46 [DEBUG] train episode 178: reward = -120.70, steps = 117\n",
      "08:14:34 [DEBUG] train episode 179: reward = -116.06, steps = 1000\n",
      "08:15:22 [DEBUG] train episode 180: reward = -109.05, steps = 1000\n",
      "08:16:13 [DEBUG] train episode 181: reward = -138.38, steps = 1000\n",
      "08:16:57 [DEBUG] train episode 182: reward = -180.98, steps = 872\n",
      "08:17:43 [DEBUG] train episode 183: reward = -121.58, steps = 1000\n",
      "08:18:31 [DEBUG] train episode 184: reward = -92.83, steps = 1000\n",
      "08:19:18 [DEBUG] train episode 185: reward = -114.65, steps = 1000\n",
      "09:45:10 [DEBUG] train episode 186: reward = -91.63, steps = 1000\n",
      "09:46:00 [DEBUG] train episode 187: reward = -51.81, steps = 1000\n",
      "09:46:51 [DEBUG] train episode 188: reward = -111.85, steps = 1000\n",
      "09:47:41 [DEBUG] train episode 189: reward = -86.58, steps = 1000\n",
      "09:48:33 [DEBUG] train episode 190: reward = -103.56, steps = 1000\n",
      "09:49:23 [DEBUG] train episode 191: reward = -59.68, steps = 1000\n",
      "09:50:17 [DEBUG] train episode 192: reward = -103.80, steps = 1000\n",
      "09:51:12 [DEBUG] train episode 193: reward = -87.74, steps = 1000\n",
      "09:52:01 [DEBUG] train episode 194: reward = -102.89, steps = 1000\n",
      "09:52:50 [DEBUG] train episode 195: reward = -71.90, steps = 1000\n",
      "09:53:40 [DEBUG] train episode 196: reward = -100.19, steps = 1000\n",
      "09:54:33 [DEBUG] train episode 197: reward = -111.32, steps = 1000\n",
      "09:55:27 [DEBUG] train episode 198: reward = -130.02, steps = 1000\n",
      "09:56:19 [DEBUG] train episode 199: reward = -102.46, steps = 1000\n",
      "09:57:10 [DEBUG] train episode 200: reward = -94.77, steps = 1000\n",
      "09:58:00 [DEBUG] train episode 201: reward = -95.71, steps = 1000\n",
      "09:58:50 [DEBUG] train episode 202: reward = -130.00, steps = 1000\n",
      "09:59:42 [DEBUG] train episode 203: reward = -90.24, steps = 1000\n",
      "10:00:35 [DEBUG] train episode 204: reward = -56.69, steps = 1000\n",
      "10:01:24 [DEBUG] train episode 205: reward = -127.45, steps = 1000\n",
      "10:02:13 [DEBUG] train episode 206: reward = -159.15, steps = 1000\n",
      "10:03:05 [DEBUG] train episode 207: reward = -89.27, steps = 1000\n",
      "10:03:57 [DEBUG] train episode 208: reward = -143.35, steps = 1000\n",
      "10:04:02 [DEBUG] train episode 209: reward = -152.49, steps = 107\n",
      "10:04:15 [DEBUG] train episode 210: reward = -218.52, steps = 281\n",
      "10:04:23 [DEBUG] train episode 211: reward = -132.44, steps = 148\n",
      "10:05:13 [DEBUG] train episode 212: reward = -112.33, steps = 1000\n",
      "10:05:21 [DEBUG] train episode 213: reward = -190.35, steps = 173\n",
      "10:05:34 [DEBUG] train episode 214: reward = -192.30, steps = 250\n",
      "10:05:43 [DEBUG] train episode 215: reward = -372.53, steps = 185\n",
      "10:05:51 [DEBUG] train episode 216: reward = -448.11, steps = 173\n",
      "10:05:59 [DEBUG] train episode 217: reward = -514.03, steps = 155\n",
      "10:06:04 [DEBUG] train episode 218: reward = -331.05, steps = 102\n",
      "10:06:13 [DEBUG] train episode 219: reward = -355.23, steps = 180\n",
      "10:06:21 [DEBUG] train episode 220: reward = -315.29, steps = 164\n",
      "10:06:27 [DEBUG] train episode 221: reward = -334.99, steps = 110\n",
      "10:06:32 [DEBUG] train episode 222: reward = -338.17, steps = 95\n",
      "10:06:39 [DEBUG] train episode 223: reward = -239.06, steps = 134\n",
      "10:07:33 [DEBUG] train episode 224: reward = -235.71, steps = 1000\n",
      "10:07:47 [DEBUG] train episode 225: reward = -221.32, steps = 285\n",
      "10:07:55 [DEBUG] train episode 226: reward = -21.88, steps = 152\n",
      "10:08:01 [DEBUG] train episode 227: reward = -191.53, steps = 137\n",
      "10:08:08 [DEBUG] train episode 228: reward = -287.15, steps = 146\n",
      "10:08:12 [DEBUG] train episode 229: reward = -140.63, steps = 77\n",
      "10:08:16 [DEBUG] train episode 230: reward = -123.00, steps = 69\n",
      "10:08:22 [DEBUG] train episode 231: reward = -153.94, steps = 135\n",
      "10:08:35 [DEBUG] train episode 232: reward = -123.91, steps = 257\n",
      "10:08:38 [DEBUG] train episode 233: reward = -85.80, steps = 65\n",
      "10:08:43 [DEBUG] train episode 234: reward = -52.43, steps = 95\n",
      "10:08:51 [DEBUG] train episode 235: reward = -237.58, steps = 111\n",
      "10:09:23 [DEBUG] train episode 236: reward = -77.69, steps = 620\n",
      "10:09:26 [DEBUG] train episode 237: reward = -143.91, steps = 59\n",
      "10:09:52 [DEBUG] train episode 238: reward = -178.96, steps = 569\n",
      "10:09:54 [DEBUG] train episode 239: reward = -57.07, steps = 65\n",
      "10:10:21 [DEBUG] train episode 240: reward = 226.20, steps = 582\n",
      "10:10:26 [DEBUG] train episode 241: reward = -263.10, steps = 109\n",
      "10:10:44 [DEBUG] train episode 242: reward = -118.00, steps = 407\n",
      "10:10:49 [DEBUG] train episode 243: reward = -145.33, steps = 111\n",
      "10:11:19 [DEBUG] train episode 244: reward = -276.73, steps = 660\n",
      "10:11:22 [DEBUG] train episode 245: reward = -78.30, steps = 75\n",
      "10:11:27 [DEBUG] train episode 246: reward = -175.75, steps = 113\n",
      "10:11:46 [DEBUG] train episode 247: reward = -202.39, steps = 429\n",
      "10:11:56 [DEBUG] train episode 248: reward = -87.05, steps = 212\n",
      "10:11:59 [DEBUG] train episode 249: reward = -105.88, steps = 76\n",
      "10:12:03 [DEBUG] train episode 250: reward = 24.85, steps = 99\n",
      "10:12:06 [DEBUG] train episode 251: reward = -31.90, steps = 69\n",
      "10:12:10 [DEBUG] train episode 252: reward = -98.10, steps = 88\n",
      "10:12:13 [DEBUG] train episode 253: reward = -65.26, steps = 68\n",
      "10:12:23 [DEBUG] train episode 254: reward = -47.99, steps = 214\n",
      "10:12:26 [DEBUG] train episode 255: reward = -66.61, steps = 78\n",
      "10:12:30 [DEBUG] train episode 256: reward = -55.35, steps = 96\n",
      "10:12:43 [DEBUG] train episode 257: reward = -108.98, steps = 287\n",
      "10:12:53 [DEBUG] train episode 258: reward = -243.54, steps = 228\n",
      "10:12:58 [DEBUG] train episode 259: reward = -61.50, steps = 113\n",
      "10:13:04 [DEBUG] train episode 260: reward = 6.30, steps = 146\n",
      "10:13:07 [DEBUG] train episode 261: reward = -63.49, steps = 69\n",
      "10:13:12 [DEBUG] train episode 262: reward = -27.91, steps = 110\n",
      "10:13:16 [DEBUG] train episode 263: reward = -28.44, steps = 86\n",
      "10:13:21 [DEBUG] train episode 264: reward = -73.21, steps = 125\n",
      "10:13:33 [DEBUG] train episode 265: reward = -90.64, steps = 267\n",
      "10:13:37 [DEBUG] train episode 266: reward = -71.93, steps = 106\n",
      "10:13:41 [DEBUG] train episode 267: reward = -80.66, steps = 83\n",
      "10:13:46 [DEBUG] train episode 268: reward = -254.02, steps = 119\n",
      "10:13:51 [DEBUG] train episode 269: reward = -175.93, steps = 106\n",
      "10:13:55 [DEBUG] train episode 270: reward = -31.64, steps = 101\n",
      "10:14:00 [DEBUG] train episode 271: reward = 39.31, steps = 122\n",
      "10:14:04 [DEBUG] train episode 272: reward = -96.64, steps = 87\n",
      "10:14:08 [DEBUG] train episode 273: reward = -126.36, steps = 98\n",
      "10:14:11 [DEBUG] train episode 274: reward = -93.53, steps = 58\n",
      "10:14:16 [DEBUG] train episode 275: reward = -29.92, steps = 123\n",
      "10:14:21 [DEBUG] train episode 276: reward = -195.91, steps = 105\n",
      "10:14:25 [DEBUG] train episode 277: reward = -59.00, steps = 94\n",
      "10:14:28 [DEBUG] train episode 278: reward = -69.29, steps = 60\n",
      "10:14:33 [DEBUG] train episode 279: reward = -79.33, steps = 115\n",
      "10:14:39 [DEBUG] train episode 280: reward = -46.21, steps = 131\n",
      "10:14:42 [DEBUG] train episode 281: reward = -41.76, steps = 78\n",
      "10:14:49 [DEBUG] train episode 282: reward = -81.62, steps = 167\n",
      "10:14:56 [DEBUG] train episode 283: reward = -61.45, steps = 136\n",
      "10:15:00 [DEBUG] train episode 284: reward = -83.56, steps = 114\n",
      "10:15:08 [DEBUG] train episode 285: reward = -45.95, steps = 182\n",
      "10:15:13 [DEBUG] train episode 286: reward = -231.50, steps = 100\n",
      "10:15:19 [DEBUG] train episode 287: reward = 2.56, steps = 142\n",
      "10:15:23 [DEBUG] train episode 288: reward = -164.72, steps = 91\n",
      "10:15:28 [DEBUG] train episode 289: reward = -269.13, steps = 110\n",
      "10:15:33 [DEBUG] train episode 290: reward = -187.57, steps = 92\n",
      "10:15:37 [DEBUG] train episode 291: reward = -90.53, steps = 90\n",
      "10:15:44 [DEBUG] train episode 292: reward = -29.03, steps = 156\n",
      "10:15:50 [DEBUG] train episode 293: reward = -79.58, steps = 148\n",
      "10:15:56 [DEBUG] train episode 294: reward = -113.31, steps = 134\n",
      "10:16:01 [DEBUG] train episode 295: reward = -282.00, steps = 113\n",
      "10:16:10 [DEBUG] train episode 296: reward = -286.56, steps = 206\n",
      "10:16:14 [DEBUG] train episode 297: reward = -117.19, steps = 98\n",
      "10:16:20 [DEBUG] train episode 298: reward = -188.22, steps = 129\n",
      "10:16:24 [DEBUG] train episode 299: reward = -178.26, steps = 106\n",
      "10:16:32 [DEBUG] train episode 300: reward = -116.22, steps = 167\n",
      "10:16:36 [DEBUG] train episode 301: reward = -125.04, steps = 103\n",
      "10:16:41 [DEBUG] train episode 302: reward = -100.87, steps = 107\n",
      "10:16:46 [DEBUG] train episode 303: reward = -219.16, steps = 108\n",
      "10:16:53 [DEBUG] train episode 304: reward = -172.78, steps = 164\n",
      "10:16:57 [DEBUG] train episode 305: reward = -224.83, steps = 105\n",
      "10:17:03 [DEBUG] train episode 306: reward = 37.84, steps = 120\n",
      "10:17:12 [DEBUG] train episode 307: reward = -197.55, steps = 225\n",
      "10:17:20 [DEBUG] train episode 308: reward = -244.53, steps = 167\n",
      "10:17:24 [DEBUG] train episode 309: reward = 1.84, steps = 98\n",
      "10:17:28 [DEBUG] train episode 310: reward = -38.25, steps = 92\n",
      "10:18:02 [DEBUG] train episode 311: reward = 99.52, steps = 764\n",
      "10:18:09 [DEBUG] train episode 312: reward = -317.85, steps = 157\n",
      "10:18:15 [DEBUG] train episode 313: reward = -43.25, steps = 151\n",
      "10:18:20 [DEBUG] train episode 314: reward = -31.94, steps = 105\n",
      "10:18:27 [DEBUG] train episode 315: reward = -318.80, steps = 161\n",
      "10:18:35 [DEBUG] train episode 316: reward = -293.41, steps = 178\n",
      "10:18:40 [DEBUG] train episode 317: reward = 19.58, steps = 114\n",
      "10:18:46 [DEBUG] train episode 318: reward = -267.28, steps = 152\n",
      "10:18:53 [DEBUG] train episode 319: reward = -305.29, steps = 161\n",
      "10:18:59 [DEBUG] train episode 320: reward = -165.88, steps = 137\n",
      "10:19:08 [DEBUG] train episode 321: reward = -142.77, steps = 203\n",
      "10:19:24 [DEBUG] train episode 322: reward = -253.69, steps = 359\n",
      "10:19:35 [DEBUG] train episode 323: reward = -215.88, steps = 256\n",
      "10:19:41 [DEBUG] train episode 324: reward = -53.77, steps = 139\n",
      "10:20:26 [DEBUG] train episode 325: reward = 43.81, steps = 1000\n",
      "10:20:31 [DEBUG] train episode 326: reward = 18.88, steps = 126\n",
      "10:20:47 [DEBUG] train episode 327: reward = -233.37, steps = 362\n",
      "10:21:03 [DEBUG] train episode 328: reward = -96.78, steps = 340\n",
      "10:21:15 [DEBUG] train episode 329: reward = -173.47, steps = 268\n",
      "10:21:34 [DEBUG] train episode 330: reward = -55.85, steps = 442\n",
      "10:21:46 [DEBUG] train episode 331: reward = -61.28, steps = 272\n",
      "10:22:05 [DEBUG] train episode 332: reward = -154.27, steps = 408\n",
      "10:22:22 [DEBUG] train episode 333: reward = -54.09, steps = 370\n",
      "10:22:31 [DEBUG] train episode 334: reward = -12.82, steps = 178\n",
      "10:22:38 [DEBUG] train episode 335: reward = -37.68, steps = 162\n",
      "10:23:24 [DEBUG] train episode 336: reward = 27.19, steps = 1000\n",
      "10:23:36 [DEBUG] train episode 337: reward = -49.52, steps = 266\n",
      "10:24:19 [DEBUG] train episode 338: reward = 91.25, steps = 937\n",
      "10:25:05 [DEBUG] train episode 339: reward = -29.73, steps = 1000\n",
      "10:25:50 [DEBUG] train episode 340: reward = -5.60, steps = 1000\n",
      "10:26:37 [DEBUG] train episode 341: reward = -36.03, steps = 1000\n",
      "10:27:29 [DEBUG] train episode 342: reward = -87.34, steps = 1000\n",
      "10:28:18 [DEBUG] train episode 343: reward = -40.44, steps = 1000\n",
      "10:28:26 [DEBUG] train episode 344: reward = -178.64, steps = 180\n",
      "10:29:11 [DEBUG] train episode 345: reward = 1.81, steps = 1000\n",
      "10:29:56 [DEBUG] train episode 346: reward = 8.41, steps = 1000\n",
      "10:30:42 [DEBUG] train episode 347: reward = -37.85, steps = 1000\n",
      "10:31:30 [DEBUG] train episode 348: reward = -59.51, steps = 1000\n",
      "10:32:17 [DEBUG] train episode 349: reward = -48.58, steps = 1000\n",
      "10:33:07 [DEBUG] train episode 350: reward = 19.01, steps = 1000\n",
      "10:34:03 [DEBUG] train episode 351: reward = 20.37, steps = 1000\n",
      "10:35:00 [DEBUG] train episode 352: reward = -36.50, steps = 1000\n",
      "10:35:54 [DEBUG] train episode 353: reward = 6.67, steps = 1000\n",
      "10:36:49 [DEBUG] train episode 354: reward = 15.42, steps = 1000\n",
      "10:37:46 [DEBUG] train episode 355: reward = -3.12, steps = 1000\n",
      "10:38:42 [DEBUG] train episode 356: reward = 12.33, steps = 1000\n",
      "10:39:41 [DEBUG] train episode 357: reward = -276.81, steps = 999\n",
      "10:40:36 [DEBUG] train episode 358: reward = -29.50, steps = 1000\n",
      "10:41:34 [DEBUG] train episode 359: reward = -6.15, steps = 1000\n",
      "10:42:29 [DEBUG] train episode 360: reward = 11.12, steps = 1000\n",
      "10:43:26 [DEBUG] train episode 361: reward = -46.64, steps = 1000\n",
      "10:44:21 [DEBUG] train episode 362: reward = 20.21, steps = 1000\n",
      "10:45:17 [DEBUG] train episode 363: reward = -21.80, steps = 1000\n",
      "10:46:07 [DEBUG] train episode 364: reward = 4.08, steps = 1000\n",
      "10:46:58 [DEBUG] train episode 365: reward = 25.01, steps = 1000\n",
      "10:47:51 [DEBUG] train episode 366: reward = -11.14, steps = 1000\n",
      "10:48:43 [DEBUG] train episode 367: reward = -5.90, steps = 1000\n",
      "10:49:34 [DEBUG] train episode 368: reward = 152.86, steps = 942\n",
      "10:50:27 [DEBUG] train episode 369: reward = 98.29, steps = 982\n",
      "10:51:17 [DEBUG] train episode 370: reward = 6.46, steps = 1000\n",
      "10:52:10 [DEBUG] train episode 371: reward = -4.99, steps = 1000\n",
      "10:53:02 [DEBUG] train episode 372: reward = 59.76, steps = 1000\n",
      "10:53:55 [DEBUG] train episode 373: reward = 26.03, steps = 1000\n",
      "10:54:49 [DEBUG] train episode 374: reward = -128.53, steps = 1000\n",
      "10:55:42 [DEBUG] train episode 375: reward = 38.01, steps = 1000\n",
      "10:56:35 [DEBUG] train episode 376: reward = 13.31, steps = 1000\n",
      "10:57:25 [DEBUG] train episode 377: reward = 41.84, steps = 1000\n",
      "10:58:05 [DEBUG] train episode 378: reward = 162.93, steps = 769\n",
      "10:58:50 [DEBUG] train episode 379: reward = 115.39, steps = 884\n",
      "10:59:29 [DEBUG] train episode 380: reward = 170.95, steps = 732\n",
      "11:00:13 [DEBUG] train episode 381: reward = 127.19, steps = 843\n",
      "11:00:31 [DEBUG] train episode 382: reward = 293.36, steps = 344\n",
      "11:01:09 [DEBUG] train episode 383: reward = 176.53, steps = 734\n",
      "11:01:47 [DEBUG] train episode 384: reward = 172.70, steps = 766\n",
      "11:02:17 [DEBUG] train episode 385: reward = 202.70, steps = 606\n",
      "11:02:45 [DEBUG] train episode 386: reward = 216.61, steps = 529\n",
      "11:03:13 [DEBUG] train episode 387: reward = 198.19, steps = 549\n",
      "11:04:02 [DEBUG] train episode 388: reward = 145.91, steps = 928\n",
      "11:04:34 [DEBUG] train episode 389: reward = 181.54, steps = 575\n",
      "11:05:01 [DEBUG] train episode 390: reward = 241.98, steps = 487\n",
      "11:05:25 [DEBUG] train episode 391: reward = 248.55, steps = 414\n",
      "11:05:25 [INFO] ==== test ====\n",
      "11:05:26 [DEBUG] test episode 0: reward = 277.28, steps = 418\n",
      "11:05:27 [DEBUG] test episode 1: reward = 233.27, steps = 401\n",
      "11:05:28 [DEBUG] test episode 2: reward = 228.74, steps = 514\n",
      "11:05:29 [DEBUG] test episode 3: reward = 225.76, steps = 503\n",
      "11:05:30 [DEBUG] test episode 4: reward = 249.42, steps = 523\n",
      "11:05:31 [DEBUG] test episode 5: reward = 221.33, steps = 464\n",
      "11:05:32 [DEBUG] test episode 6: reward = 213.58, steps = 434\n",
      "11:05:33 [DEBUG] test episode 7: reward = 259.34, steps = 451\n",
      "11:05:34 [DEBUG] test episode 8: reward = 215.36, steps = 584\n",
      "11:05:35 [DEBUG] test episode 9: reward = 234.53, steps = 401\n",
      "11:05:36 [DEBUG] test episode 10: reward = 218.23, steps = 669\n",
      "11:05:37 [DEBUG] test episode 11: reward = 263.28, steps = 464\n",
      "11:05:38 [DEBUG] test episode 12: reward = 206.59, steps = 458\n",
      "11:05:39 [DEBUG] test episode 13: reward = 263.12, steps = 371\n",
      "11:05:40 [DEBUG] test episode 14: reward = 234.53, steps = 449\n",
      "11:05:40 [DEBUG] test episode 15: reward = 254.30, steps = 375\n",
      "11:05:41 [DEBUG] test episode 16: reward = 224.86, steps = 513\n",
      "11:05:42 [DEBUG] test episode 17: reward = 214.34, steps = 489\n",
      "11:05:43 [DEBUG] test episode 18: reward = 234.87, steps = 418\n",
      "11:05:44 [DEBUG] test episode 19: reward = 208.02, steps = 429\n",
      "11:05:45 [DEBUG] test episode 20: reward = 273.05, steps = 384\n",
      "11:05:46 [DEBUG] test episode 21: reward = 209.81, steps = 502\n",
      "11:05:47 [DEBUG] test episode 22: reward = 221.46, steps = 690\n",
      "11:05:48 [DEBUG] test episode 23: reward = 211.70, steps = 624\n",
      "11:05:49 [DEBUG] test episode 24: reward = 221.82, steps = 379\n",
      "11:05:50 [DEBUG] test episode 25: reward = 211.89, steps = 598\n",
      "11:05:52 [DEBUG] test episode 26: reward = 214.95, steps = 489\n",
      "11:05:52 [DEBUG] test episode 27: reward = 255.48, steps = 374\n",
      "11:05:54 [DEBUG] test episode 28: reward = 222.00, steps = 506\n",
      "11:05:54 [DEBUG] test episode 29: reward = 234.99, steps = 420\n",
      "11:05:55 [DEBUG] test episode 30: reward = 270.02, steps = 358\n",
      "11:05:56 [DEBUG] test episode 31: reward = 274.00, steps = 369\n",
      "11:05:56 [DEBUG] test episode 32: reward = 197.17, steps = 421\n",
      "11:05:57 [DEBUG] test episode 33: reward = 265.64, steps = 493\n",
      "11:05:58 [DEBUG] test episode 34: reward = 242.44, steps = 423\n",
      "11:06:00 [DEBUG] test episode 35: reward = 218.41, steps = 621\n",
      "11:06:01 [DEBUG] test episode 36: reward = 193.90, steps = 437\n",
      "11:06:01 [DEBUG] test episode 37: reward = 242.15, steps = 433\n",
      "11:06:02 [DEBUG] test episode 38: reward = 253.12, steps = 418\n",
      "11:06:04 [DEBUG] test episode 39: reward = 219.34, steps = 561\n",
      "11:06:05 [DEBUG] test episode 40: reward = 248.94, steps = 462\n",
      "11:06:05 [DEBUG] test episode 41: reward = 254.59, steps = 400\n",
      "11:06:06 [DEBUG] test episode 42: reward = 204.38, steps = 464\n",
      "11:06:07 [DEBUG] test episode 43: reward = 227.20, steps = 411\n",
      "11:06:08 [DEBUG] test episode 44: reward = 246.14, steps = 426\n",
      "11:06:09 [DEBUG] test episode 45: reward = 240.90, steps = 406\n",
      "11:06:10 [DEBUG] test episode 46: reward = 221.85, steps = 638\n",
      "11:06:11 [DEBUG] test episode 47: reward = 223.06, steps = 426\n",
      "11:06:12 [DEBUG] test episode 48: reward = 266.92, steps = 477\n",
      "11:06:13 [DEBUG] test episode 49: reward = 241.36, steps = 537\n",
      "11:06:14 [DEBUG] test episode 50: reward = 213.25, steps = 392\n",
      "11:06:15 [DEBUG] test episode 51: reward = 238.72, steps = 432\n",
      "11:06:15 [DEBUG] test episode 52: reward = 270.38, steps = 400\n",
      "11:06:16 [DEBUG] test episode 53: reward = 231.75, steps = 385\n",
      "11:06:17 [DEBUG] test episode 54: reward = 247.63, steps = 424\n",
      "11:06:18 [DEBUG] test episode 55: reward = 231.26, steps = 412\n",
      "11:06:19 [DEBUG] test episode 56: reward = 202.18, steps = 403\n",
      "11:06:19 [DEBUG] test episode 57: reward = 212.22, steps = 447\n",
      "11:06:20 [DEBUG] test episode 58: reward = 261.40, steps = 449\n",
      "11:06:21 [DEBUG] test episode 59: reward = 224.69, steps = 450\n",
      "11:06:22 [DEBUG] test episode 60: reward = 264.63, steps = 427\n",
      "11:06:23 [DEBUG] test episode 61: reward = 231.54, steps = 480\n",
      "11:06:24 [DEBUG] test episode 62: reward = 237.32, steps = 425\n",
      "11:06:25 [DEBUG] test episode 63: reward = 229.52, steps = 435\n",
      "11:06:25 [DEBUG] test episode 64: reward = 257.12, steps = 421\n",
      "11:06:26 [DEBUG] test episode 65: reward = 228.47, steps = 444\n",
      "11:06:27 [DEBUG] test episode 66: reward = 194.83, steps = 432\n",
      "11:06:28 [DEBUG] test episode 67: reward = 210.60, steps = 504\n",
      "11:06:30 [DEBUG] test episode 68: reward = 216.76, steps = 672\n",
      "11:06:31 [DEBUG] test episode 69: reward = 258.72, steps = 449\n",
      "11:06:32 [DEBUG] test episode 70: reward = 232.74, steps = 385\n",
      "11:06:33 [DEBUG] test episode 71: reward = 258.31, steps = 448\n",
      "11:06:34 [DEBUG] test episode 72: reward = 250.08, steps = 492\n",
      "11:06:35 [DEBUG] test episode 73: reward = 207.09, steps = 453\n",
      "11:06:36 [DEBUG] test episode 74: reward = 239.58, steps = 494\n",
      "11:06:36 [DEBUG] test episode 75: reward = 283.37, steps = 398\n",
      "11:06:37 [DEBUG] test episode 76: reward = 273.23, steps = 406\n",
      "11:06:38 [DEBUG] test episode 77: reward = 199.14, steps = 437\n",
      "11:06:39 [DEBUG] test episode 78: reward = 206.96, steps = 453\n",
      "11:06:40 [DEBUG] test episode 79: reward = 232.77, steps = 527\n",
      "11:06:41 [DEBUG] test episode 80: reward = 262.82, steps = 414\n",
      "11:06:42 [DEBUG] test episode 81: reward = 238.25, steps = 417\n",
      "11:06:42 [DEBUG] test episode 82: reward = 213.53, steps = 486\n",
      "11:06:43 [DEBUG] test episode 83: reward = 276.50, steps = 429\n",
      "11:06:44 [DEBUG] test episode 84: reward = 221.97, steps = 602\n",
      "11:06:46 [DEBUG] test episode 85: reward = 210.29, steps = 722\n",
      "11:06:46 [DEBUG] test episode 86: reward = 248.47, steps = 399\n",
      "11:06:48 [DEBUG] test episode 87: reward = 218.88, steps = 714\n",
      "11:06:49 [DEBUG] test episode 88: reward = 197.18, steps = 432\n",
      "11:06:50 [DEBUG] test episode 89: reward = 229.81, steps = 405\n",
      "11:06:51 [DEBUG] test episode 90: reward = 228.87, steps = 470\n",
      "11:06:52 [DEBUG] test episode 91: reward = 243.35, steps = 437\n",
      "11:06:53 [DEBUG] test episode 92: reward = 227.23, steps = 441\n",
      "11:06:54 [DEBUG] test episode 93: reward = 252.32, steps = 409\n",
      "11:06:55 [DEBUG] test episode 94: reward = 251.28, steps = 409\n",
      "11:06:56 [DEBUG] test episode 95: reward = 210.83, steps = 484\n",
      "11:06:57 [DEBUG] test episode 96: reward = 222.87, steps = 490\n",
      "11:06:58 [DEBUG] test episode 97: reward = 209.81, steps = 603\n",
      "11:06:59 [DEBUG] test episode 98: reward = 277.47, steps = 399\n",
      "11:07:00 [DEBUG] test episode 99: reward = 217.30, steps = 448\n",
      "11:07:00 [INFO] average episode reward = 234.15 ± 22.26\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAD4CAYAAAAKA1qZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABOs0lEQVR4nO29d5wdZdn//7nntO0tm2zKJtkEkpBCKAkh0hQBCUXBB1RUBB9U1Ad/X1BRQUR4VL6ifNXHgigCgiIPAiJFpHcIEFII6WTTe3aT7bunzdy/P2buOffMmTllT92d6/165ZXdOe0+szPXdV+dcc5BEARBeBOl1AsgCIIgSgcpAYIgCA9DSoAgCMLDkBIgCILwMKQECIIgPIy/1AvIlObmZt7W1lbqZRAEQYwoVqxY0ck5H+v2+IhRAm1tbVi+fHmpl0EQBDGiYIztSPU4uYMIgiA8DCkBgiAID0NKgCAIwsOQEiAIgvAwpAQIgiA8DCkBgiAID0NKgCAIwsOQEiAIgighHX0RPLN2f8k+n5QAQRBECbn8nmX42v0rMBCJl+TzSQkQBEGUkF2HBwEAMVUryeeTEiAIgighisIAANE4KQGCIAjP4TOUQISUAEEQhPcwdAAicbU0n1+STyUIgiAAAIzpWiAcI0uAIAjCcyQsAVICBEEQnsPHREyA3EEEQRCegzH3wPBQVMXe7qGCfj4pAYIgiBKiGFI44hAT+NPrW/HRX7yCfT2FUwSkBAiCIEqImzuosz+CP7y6BafPGocJ9ZUF+3xSAkRBiKtayXycBDGSUIQSsFkC6/b2YjCq4vKT2gr7+QV9d8Kz/PzZTfjC3ctKvQyCKHsUxdkS6B2KAQDGVAcL+/kFfXfCs+zrCWN/T7jUy/AUq3d1ozccK/UyiCxxSxEVf8vaikBhP7+g7054Fk3jUDVe6mV4hpiq4YLb38SX711e6qUQWaK4ZAf1DuldResq/YX9/IK+O+FZNM7BOSmBYiEU7nu7u0u7EGLYRGIqYqqGd7YeAgD0hWPwKwyVAV9BP5eUAFEQVI1DJSVQNDTjXLMSr4PIHtFCOhLX8PNnNuIzd76NtXt60BuOobbCb9YRFIqclQBjbDJj7GXG2AbG2DrG2NXG8SbG2POMsc3G/43Sa65njLUzxjYxxs7OdQ1E+aFxDvIGFQ9hCSgFFhhE/okbf7tIXMPLmzoAAOv39aJ3KI66ysLGA4D8WAJxAN/mnM8GsBjAVYyxOQCuA/Ai53wGgBeN32E8dgmAuQCWAPg9Y6yw9g5RdDSuxwWI4iCUAOmAkUfMiAUMROLYZ1QHr93Tg75wDHUFDgoDeVACnPN9nPOVxs99ADYAmATgAgD3GU+7D8CFxs8XAHiQcx7hnG8D0A5gUa7rIMoLcgcVF7IERi5RVf/bbdjfi4Gonib6/u4e9IbjBQ8KA3mOCTDG2gAcB+AdAC2c832ArigAjDOeNgnALullu41jTu93JWNsOWNseUdHRz6XShQYjXOyBIqIULikA0YeIiaw5eAAAODoSfXY0tGP3qEYakMjwBIQMMZqAPwDwDWc895UT3U45igtOOd3cs4Xcs4Xjh07Nh/LJIoExQSKi+kOKvE6iOyJG0pgKKZbAUe31qMvHEfXYGzkWAKMsQB0BfA3zvmjxuEDjLEJxuMTABw0ju8GMFl6eSuAvflYB1E+qBo3M1aIwpOICZAaGGnE1MR9EvQrOHJsDQC9d9CIiAkw/aq7G8AGzvkvpYeeAHC58fPlAB6Xjl/CGAsxxqYBmAGA+guMMjQOKhYrIppRZ6SQDhhRcM4RVRNFYq0NlWiS2kQUuloYAPJha5wM4AsA1jDG3jOOfR/ArQAeYox9CcBOAJ8CAM75OsbYQwDWQ88suopzTp3GRhmaxkGGQPGIG1qALIGRRdy2UWqqDqK+KiH4m2sL2zcIyIMS4Jy/AXdX5Bkur7kFwC25fjZRvmicsoOKCRWLjTw457jhn2sAAAumNmLFji60NlaiQaoNGF9XUfB1FD7qQHgSldxBRUV4FMgSGDl0D8bw0PLdAIBzj56Amz4+BxPqK9EfiZvPaSElQIxURHoo55wEUxFIuINKvBAiI659eDUeWbHb/D3gY5jf2gAA8EuBnWIoAeodRBQE4Z4ga6A4iMAw6YCRgawAACDgS4hiuVVEoWcJAKQEiAIhhD/pgOIg4i9UMVwevLG5E23XPYWugWhGz5eVgE+yBJQipHuREiAKgogJU61AcVDJHVRW3Pn6VgCZt/YO+Er3h6OYAFEQVHIHFRWV3EFlRZUxA2Aomln2u2wJAMCitia01Bc+HgCQEiAKhGa6g0gJFAOqGC4vqoK6EhiQMn1kKgM+s00EgKSamoe+9qGCrc0OuYOIgiCEv6aleSKRF6iVdHlRFTIsgZizJVAdsu6/+yOlmw1NSoAoCMIdRJZAcaAuouVFVVAX8gMRNyVgHaHSM0RKgBhlCAuAqoaLg0bzBMqKoOHjd9vhB6UYQEVAwdlzxxdlXU5QTIAoCBpZAkUlTq2kywrx93Db4ctN4zb++JyirMkNsgSIgkAxgeJCk8XKCzEopmfIOTAciZXPjUFKgCgIKrmDiopGMYGyIqEEnC2BSFzFidOa8Np3Ti/mshwhJUAUhIQlQEqgGMQpRbSsMJXAoHPFcDSuYX5rPaaMqSrmshwhJUAUBIoJFBdtlMcEHl25O6nfTjkTjaeOCUTiGoL+8hC/FBgmCgL1Diou8VEeE/jWQ6sBABcvaC3xSjJDWAK94eSYQFzVENc4Qn5f0mOloDxUETHqEAYAtY0oDhoVi2XNf/55GS6+Y2lB3lsoAae2ESIzKESWADGaUaV5AkThoQB89ry8qaNg720qgZiaNFNDZAaVixIoj1UQow6zgRwJp6Iw2t1BI42YmrjuI3FrOqhpCQTIHUSMYjh1ES0q5A4qL2JSMdigzSVElgDhCRLuoBIvxCNQA7nyQlYC9iZykbj+e7lkB5XHKohRh0aB4aJCFcPlRVRyB9mDw8I9RNlBxKhFLhCjOoHiYHYRLfE6CJ1YXHYHxfFme6fpIhWWALmDiFGLHAwmJVAcaKhMeRFTNdRV6MmXD7yzE5+/6x38Y+UeABQTIDyALPjV8umTNaoZzTGBQqcZxwtwkcZUDXWVAQDA9kMDAICN+3oBABHKDiJGO3LnULIEioM6ittGRAu8kyjE+8dUjroKXQmIOM1AVK8eFpZA0Fce4rc8VkGMKmTBTw3kioM456PxbEfj2QnpM3/5Kq7628qCvX9G76lqqDcsgd6w3j+o35gyZsYEAuUhfstjFcSowhoTKOFCPES8CL2a2q57Clf+ZXnhPkDimbX78buXNgPIXki3H+zHU2v2pXyOnLVmL+bKB7o7SI8JHOyNAAD6wjHc8tR6/P3dXQCA2oryaNhQHqsgRhXy7p8qhouDVqQ2Hc+tP1DQ9xd87f4VAIBvfHRGgdw1ifcsxICXWFwz3UEd/boSONgbwStGq4qKgIKxNaG8f+5wIEuAyDvybpRiAsWh0MV5pewBNVx3TdeAcy//SFzF3W9ss/yeL8R5iqncdAeJU7e/N2w+b3JjVdlkcpESIPKObGpTTKA4JNxBzud70/4+tF33FFbsODys9y+EyyRTslECsrLa2jng+Jw/vLIVtz27yfw93XdTNZ5x0ePFf3gLZ/ziFURVDVVBH3xKQtAflpTSpMbKjN6vGJASIPIOt6SIkhIoBokhPs6PL93SCQB4/L29w3r/gYjzrNxi4OQOCsdUzL/5WTyzdr/rc7d29Du+X6fhnhGksgTW7unB7B8+g0/+/k0AzhZROKai7bqn8MdXt2DFji5s6dCVT9CvoNIlDdSvlI/oLZ+VEKMGCgwXn3Stu4UwCseG5/roL6UScNip7+sJozccx61Pb7AcD0cTz3Wb6mWPU6WKCazc2YVoXMP7u3vwZnsnpl3/b7y44QA2H+gDoMcWROD31mc2Wl4b8CmoDDorgbhWPgU0JVMCjLEljLFNjLF2xth1pVpHJvzq+Q9w0+NrS72MEYNKbSOKjprGHSSE0ZCLwOOcp1QQ5aYEBPZvKzdrczsXdkUZsVka7Qf78de3dwAA+qTJYD/+13oAwJfuW46zfvUaAODye5bhtNteNt7X+jkBn7sl8L0lRzkeLwUlUQKMMR+A2wGcA2AOgM8yxuaUYi2Z8OsXN+O+t3aUehkjBvlmyEYJ7O0eoiE0wyTdOE9RsOQm6J9YvRcn/t8XXR8fiGRvQYRjKtbs7sn6dXb29YSTjrmFVOX1x11Ohn0TLlsCmsZx5i9fxY2PrUU4plqU38b9fUnvtXTLIdd1B/wKakJ6AqYcA77p43Mwe0Kd6+uKTaksgUUA2jnnWznnUQAPArigRGsh8oxsCWQaE1i5swsn3foSHl4+coaJlxMqT20JiN20m5Df1jmAnqEYugedXSj9Eefjqbj+0TX4+O/ewMHeZCGeKU+9vw/X/P29jJ8vWwKq6nwuktxBUkxg+Y4u8+euwSj6HWYECwajqa2joI/hQ0eMAWDtE+Qvk0phQalWMwnALun33cYxC4yxKxljyxljyzs6CjcKjsgvsiDKdGO/cZ++y1q5syvNMwkn0qWIioCp+05fF2huwr5/GJbA6l3dAIC+HFxJ7+/pTvm4/fsOZWAJ2N1LP/33RjOdtFeKIxzqjya5wYQwn1BfgfcdrJw6qQAs4FNw1pwWAEBYsjaCvvJIDRWUSgk4nYWkvxjn/E7O+ULO+cKxY8cWYVlEPtCGkR0kAmVySh2ROeliAglLwNm/3m8qgdRKIiuMP2UuLj5RcJUp4TQxgbiqJQWM9/eGcdtzesqoHLDtGoxaYgIAcN8VizB9bDWqgj6s39ub9P6LpjWZP09rrsaitib858ltuPqMGebxQJlZAqWqGN4NYLL0eyuA4eWuEWWHHGfLtGI4bpju5XaDjBS0DN1B9ilXAiHs3Nwf4ng2Tc9EHCKXME+dS2sFt+sqXUxgzk3POgaaw8bgF3k28OGBKPojMTRWBdBluMmmNFVh7sR6rN3Tg3aHFNSWugrMb63H7PF1OG5KIwDgpo/PxcsbD5rPKbdrvFRK4F0AMxhj0wDsAXAJgM+VaC2e5b+fXIfz50/AgqlN6Z+cBVZ3UGYSQOxkyRKw0jMYQyigoCJN22GhRN0yD4Xf280d1J/WHaQ/7s/ClSH+lDmlCbtU1ZptMmwOhCEpRdTJCnXLNBLPlNtJCHfQpMZKUwk0VQfhY/p7tx9MVgI1FX48ftXJSdXAsuAPkDsI4JzHAXwDwLMANgB4iHO+rhRr8SrRuIY/v7kdF93xVt7f2xoYzuw1MUN6ZSNkvMAxP3oOn/3T22mfZ3YRTWcJRF2UQDgzd1DcJdjqBDP8QbnkxEdclJabJWCJCdjWGktxMe7tHkp6jQgMT6xPVPdWBHzwKQpUjWNrRz+On9JgeZ8TpjY5toOQBX+5WQIlWw3n/N+c85mc8yM457eUah1exU0Y5IPhpIia7qAyqqQsF1bt7E77nHQpoiIXvmco5qgoxE7/2odX47Sfv4z3d3c7Ph5VNXDOsWJHlxn4dUPIQvG3XbWzC799cXPa7yLjVC0cVzXXWFOqmMChfudeQgCwx1ACMUlhHRrQLYHmWmujN58CHBqIoLM/iuMNlw8AvPjtD+NMIxBsJ+CXLYHyusbLazVE0RiMFa74ZzjjJePkDsqJdL2DhCUQ1zgGHTYAcgB05+FBLNtm7TE0IL0mqmq46I6luOD2N1OuScQExA78k79fil88/0FWrUTs1by3v9yOI294OilgKxBKIOhTkiwQe7sIAJjcVInKgA/7e8JQNW4qrDHVQXQN6IFhe8tnn8LMAPus8bXm8SPG1rh+DzmWUm7WLikBjyIEQSEaGQ6nTkAV7qBhKoE/v7kN33gg80Eio410vYPkJmlO1b/2Y4dsHThlt0zMxSVktzDEtWXfzfe6tHNwwt7cTTR+E/UMSSmixnVdHfIlXXsdDkrgyW+cghvOm424xrF6dzcO9uk1DePqKrC/N4xIXENtKFkJCGpCmYVV5d1/uUwUE5TXaoiiIW6WQuxJuMUSyOw1Ygc23EKat7ceSlm9OdpJ1ztIDojad9Gc8yQlcNjmOpEFuVNwdfWubsy68RkzC2YoqpoC2q40ugbd3TJ23Jq7ucUZwnEVfoWhIuBLigl09ulK4PIPTTWPBf0KxlQHAQD/8fuluP3lLQCA6c3VWGekgNaE/Hjl2o/gne+fAQDwSTunTC3XILmDiHJDBNCUPJkC2zsHcO+b29Abjg2rlbRwZ4gA2vLth9F23VNmwC4dXYOxkva3KQTZ5NdnWicA6EL4hn+uwXaj1XI4luxjt1sCsTRK4NqHVyMa1/Du9sPgnGP2D5/BemOwun2Qe5dLVbITwhL4r48c4fh97JfvUFQzgrcsKXjcaSi270h9ewI+BQ1VwaTPnTOxzvyMmooA2pqr0VJXAQBQJMGfqWuHAsMl5v63d2C/Q/8RL5POHdQzGMMX/7ws4/P217d34OYn1+P7j64Z1lAZISjEzurXRgDRqSrTie7BKKJxDe0H+3Hur19HR1+y6T9ctnT0Y8GPn8fursG8vWcmuFW8AsCjK3fjzte2mL+nCwzLgvuNzZ342zs78ciK3egNx9BnSwudOqYKhwcirq8fsLVL0DSOHYf0czMQiScJeXtWTnc2loDhe7cLTvGeThXDFQEf/Aozz4mqcXDO0dkfQUVAQXXQhy+fMg2A7n5sqEouSJsj9fZptfX+t1oCCv7x9ZPw3DdPS/k9gpQiWjoO9obxg8fW4sq/5j4bVdN4xjvTcmfIuJFjKsftL7cn7TofWbkbr2zqwB2vtGf0fkKptB/st1YMZ6gEYsYNKyyTrUZP9kzNbSF43mzvxPp9veYuNB/c//YOHBqImr3rt3UOpN2lx1UN5//2dUuRULakSsd8ePluS58l8VS3dUXiqil83mjXZwv87uV2zL/5uaQCsaPG11oGoABA1FZEJXhp4wF09EdMd1Fnf9T0qzu9FoBrfyK3dQNWdwrgnu8fiasI+RUoCjOV6Idvexlfv38lugdjaKoKgjGGG86bje23ngfGGBptlkDAxzBTCvgunNpoedwnCXG/wrBgaiNmttQiFdY6gfISu+W1mgIgovip0sMy5Y+vbcVJt77kOqxiJCFniNz27CZLp8buwajZS8WXYcqmaKbVPWh1B72w/kBGLQfi5s5Of61I2esLpxcYnHNzvdsP6cqjM4+WgHBJhPwKdncN4oxfvIIXN6QW7gf6Ili7pxfXPfr+sD83liK//kBv2GIpxOLOO2NBVNUwplpPdVwhNUkDkvvuT6ivTHIHyUL3kBRgveLe5abVBgBPrdmHC21ZQ+JvK/R5NjEB8bn23bNdsQhiKkfIr+iWgPGc3V1DeGbdfry7/bDZUlvO5bdbAn5FwcR63fVz+qyxSXGq4cQELCmi/vISu+W1mgIgTN18pJ8v26YHHsUutZxJNzfVnia4rydh4Rz7o+fxu5d1CyDTTYtoNXx4MGrZ/a/c2W0ODVc1jj++usVUGI+s2I1zfv06OvsjpkBTuVXIZOLn74/EzdcLt8S3H16N7z0yfAEsI1wSisKwp2sIGtfTKFMh3Gi1Wfa+kUllCRzoDVuHpRt/71QxgabqZN83ABw0FGZTdRCTGioxpjqIvnDccg3FjHGJQMK3LnjEsEjaxlQBSO5PFDNqCwTZWQIaFJa8GYm6KL1YXEPAp+gFXZxb4hE7Dw86DnmxV2P7fQyMMay68Sz88QsLk54vC/6MlYAcEyizNOhRrwSEqevLQwC0ykgHs/tEy40nVu/FrB88gx2H3JWVvVhsb7ez71/J8IIVgj0a15J2/q9v1t0P/3p/L3769Eb8zwub0TMUw7UPr8aGfb3Y2jFgyW7Z3ZVQSG754DKyUNkuzZV9o70Tf393p+W7HuwN48UNBzL6TgIhDPvDcXOHfHggCs45bnt2Iz44kNxnXiiBTFMInXDLgOkLxzAQVS1KQlgrqWICshL41IJWsy+PyJ///eePx5vXfRRNNfrz7np9Gx5/b4/5evFd7Fa1cAUd3drg/NmqXpsg1pZtdpBfUWC/DN2qf2OqBr+PmTEBe3pqhT91+w0g4a5prA4muaGA4SmBILmDSofYSWYqzFJRLaYzFbDaNh/8/d2dAIAPDiTcVkvbO/HPVQkfcipLQIalSCJ9bNUe/PnNbUnv5+R6k9MQ+8Ix9EiCu3coluh9w7npCgKA3hTuoFU7u7B8+2GLUNkuKb493UP43j/W4GfS2L/L//wuvnTfcqzY0WW6kNIhlFpvOGYqgUMDERzojeD2l7fginvfTXqNOJ/2QqNscLMEDhjjDOOahqfe34fBaNzcGcuWwOpd3aawjMQ1yy74mMkN+PUlxwEAOvv07yR2q+ONLJjbnt2Eqx98DwOROGKqpAQGkl1tQZ+CKU3Ow9PjqmZR5t1Z1gn4FJaUxeamBKKqsAT0mIC92CxdDyYgfeBW3lBmWtfCGDPfl9xBRUYInrxYAkFhCZS3EhD9X77yl+W47J5lAIDP3fUOvvn31aZZbq8YdrME3BqOvb65A9f8/T3895P6yL3BaNzcFR1yKMp5YcNBrN2jZ/owxizWVG84Zu56VQ3YY1gCfoWlHOrx039vxE1PrLMEKp12whv3J4LEYjbsRXcsxRfuecf1vWWE+6N3KG5+t0P9UVNZObk3RIwllwpodyUQNtd11QMrccM/15qWgNABWzr6ccHtb+KWp/QZvNG4ZhlsMrGhAnWVAeN99O8khp8fPane8nlPrt6rWwIVzpbAuNoQWpsqXWf6xlTNEtvJVPkCuitOVwLW426B4bjKETSUgKppScVmmSiBdEPglWFYAkDCAii37KBSdREtGr3CHZQHS6DKtATK2x3UL91wr31gHcbT0R/BuNqKJGtmX88QwjEVj63aY30vF5/8yh3d5s+D0TgGIiomNVRi5+FBdDrc5F/5SyI7i8E6lakvHDcFnti1Vwd9aK4NpXQHdfZH0NkfSetjFkLrjc2dlmDq2j2ZZRCJdNOn1+43d34fHOjDSxt1t5JTVbRwBw2rD7+BW2DYnra7taM/KSYgBPKqXd1YseMwtnYO4Fip2dn4ukqEArpQEjt7kfM+rq4CE+orTEW2q2vQ4g6yt1/49SXHYWxtECG/DzsODZruP/N7qNy06GpCflOJuSHHDyJxVVcCtvvXqaeQ/lma/r1U/e9i38RUBNLve9NaAnKdQBbBRl0JqGXXH6u8VlMAxE4yH0VRQgmUmyUwFFXxoyfXoy8cg6ZZ3SmAVUiJCV5OMYFfvfABrnt0jeW4mxCTXQJ7uoYwFNOVAOBsCcgojFm6VfYOxcwUUfNzoypqK/yO2UGPrNiNQ4YC6A3HzWCwG1s6+rG9cwCX3m3d+bv1qjfXFY5h1+FBU+h19kew3xBg2w8NmtWlTkpAuIP6Iyouu2cZTv9/r6T8LCfcWm4csKVghmOaFBOwNuKLq5rZKdZuCTQIS8B0ByUeP0by78dU3bdebbqDrEq+tbESR46rxeSmKvz1SyfivR+eZV4L+us1czM2o6UG+3rCKVNs5a8dVTX4s3AHxQx3kIgJ2C0Bt8Hvn1rQav6crmrdn4Ml4KTQSs2otwREf/RsBp67ITIUBsusMvUvb23HPW9uQ2NVABcvbE3KzpBrGzbt78NpM8diMJZsCTj58p0U3l2vb8Wb7Ynd3q6uQQwYfdcB4Nl1qQOvcY1bzqFeZZx8U9eGAsntDAaiuPbh1ThyXI0pWNbs6QFj7umRGgceWLYz6bi9O6SdT//hLcfh4naiqob+SByD0Ti+dO9ytDVXm4q4PxLDhmHWLLgJugM2S6A/EgfnMFwgRr2FIcdkl5IsSOsrA6ZVJHrqyErgsg9NxfSx1fjr2zsQjWuIqokeOnZLIGTzcTdUBdFcE0x05lQ1DBpK/4ixNVi1sxt9kbjr1DBZ+WXrDoqqHAGf3up5V1cYL9iSANzcQbd96hg0VQfxx9e2pg3cKsNIEQX0sZLl5goCPGAJCHeCU+fEbBGCKh/vlU9EoZSiMEfXyDppDJ4YhGG3BDr7o0k3GpBsCRzqj+AnT23Alo4BTGuuBgBs7xxEJK5hYoNzYNDeMGsgErcolydW78Wb7da+P3+6bKFhCVg/X7S7kAd6rNnTjfrKhEC58wsL8JcrFuGsOS346X8cjZBfwb1vbk9aV7qgXiYKQLDz0CDufn0b1uzpwZOr95rB24FhzOYVpAsMC0RDNrHL5Zybw2Vkl9KuriFTkOuBSgU1Ib8UE0icj5OObMZ3lxyFgE9BJK73ARLtFezXmFMGjSxIYyo3/27Tx+rXjFMl+msfdOC1DzqsSiCuWwL2Hv2iH5HdooipmhkT2HV4yGw4J3BKEbV/j+zcQVkoAb9Sdq4gwANKoD+vSgB5e6/hcv2ja3Dqz1+yHBOCuiLgc/Shy8PbhSk/6BDXsFeJyu8tWLMn0cZhVkstQn4Ff393FwDdvfKTC+clvcdUI39c8MTqvbj24dUA9Ja9dqE2q6UWZ81pQY2DEnAKVB/ojViqPj8yaxxOmzkWf7psIT67aAoWTWty9CELv/ktT63HGb94BUvbO5OeI3DLsRcs3dKJh5bvMouMgOTAdqZ9lARuKaL7bT71PunvD+iWj3itrEg2H+jDq989HUuv+6h5rL4yYJ5jpx2wX2Hm9T6uLmS6RGWclIDcUyemagklYGwc9jkogcvuWYbL7llm+d6RuAqfjyUldqSKCYgUUScqUmTmiO+fTqwPJ0VUvH+5ZQYBHlAC4gZxEnrZIoqgStmo7H+X7cSuw0OWDAshqMMx1fSh/+myhXjkax9C0K9gpVQh2j0YRV84ZhmSLXqjbHEogrN/17WSEmiuDWLx9DHYZGTcVAZ9uHTxVNgZLwlGO6Ipl4zwmdZVBMzvs+vwIG59eqNrtpBc9WkXSp85YbLl9x+cNxuXLp6C7sEYDvaF8cA7O7GlYwBPrN6L257diJ89s9Hiiqmr8JtW0o8vnIfnvnka/t+njsEb3zsdy39wJuZOrMMD7+xE12AM5xw9wXxdW3O1RViJa7E/EnfNupJxswQOugRWRdBT49x09cRVzYx9XH/ubDRVBy0Wm2xBOTVDC/gU05oJ+pSkPjriuNPrBDFVM2f4TmvWe+7vs8WtZAUp6z7dElCSij1jKbKDhO/diYoMLIF0unq4lkDAp5A7qBQIoTEUU7PeiQHWi1O4gzJpZVAohDkvt00Wu/veoZi5qztibDUWtjVhUkOl2YRtSlMVDg9Gcf/bO9EbjuOi4/VgmNipb+tMVgKpLIGmqiDu/c8TzN+rg84hpgkplIBTrEbIj4aqAPoMgfmr5z/AH17dgidX77U8V9RuNFUFcfflC/HYVScnvd95R0/AKUc24/MnTsEZR43DxQtaMb6uApG4hkW3vGi6ph58dxduf3kL7nhlC/ZJKbNHjqvBknnjAQCfXtiKmS21uHhBK1obq9BcE8KSueOx1Th3cp+ZtjHVlnWI2oh5Nz2L83/7hus5EcQtgpGb/x/sizjuyCtNS4CbSiym6QrhK6dOwyeOmZj0Gvl9nCyBgI+ZG6igX8EEY9TiWCme4hRIlYVjLJ7I0pk6pgqM6ZZANK5hMBpHXNXwyMpEDYtsCXCut5uwB4aFcrVfPVEzMGyPU+jKLlWxmFBm9rnFdobTNgLQ6wPKrVAM8IASEG0jOAc+/ce38Infpb/5ZOQWCGJTZ3dR7OsZwqV3veNYOZpvZhvdDd/ZllACIuWuNxw3FZRoV9DaWGneMDPG1WBrxwB+/3I7Tp81FpcZfdU/f2Ly7v3t68/AladNt/i0h6Iq3pDS/6IqB2PM3IE6CSYAWDRtjGvlrJMLStxkR4ytAed6mw6RzvjYe1YlcHSrntPeUBXEGbNbcOzkhqT3Y4zh/i+fiFs+eTTu/uIJaKgKWnbAAHDGUeMsv4uis1NnNOPzJ07FzR+fi5U3noWQgxCRUy+nS9OlhP9b0DMUMwOaTkPK7cjWiPDtdw7oLTYmOcRfhL971g+eMf9uIkPGad2Avc+9iyUQTVgCIgg8takq6bkyfpslMBTTG9hVBHwI+RWEYyouumMp5vzwWfzr/X34rtTiw950UK8Yzjw7KOhLzsARmVCZxATS5ZBYWkln4ePXA8PlJ3LLb0V5pqk6ZLZLXr6jC+/v7sGuw4P4wWNrkvqcO6E6WgJWJfDFe97FG+2deKsIQ03Ezln2qYqfe8MxM2NGVKoKYRH0K5hhdDrsi8Tx7Y/NwjGTG7DhR0tw7tETknzeY2tDqA35EVU1vLJJb5b23Pr9GIiq+ORxkwAk3A/CvVDlYglcvKAVL137YcfH7OmGQOImm9GiC9TNB/vMXH0RxLzw2Ik4anwtjhqvK8VGh3bAqaizKYGvfvgI+BWGj8waCyDRZO2WC4/GRQta4fcprnGBeRMTxVWTpapZuyXQPRTFliyaD8ruoLjKcdXfVuKaB98DkNzeGLBmvmw+mEgFVjWelMEjkJWAk0Dz+xQzkyvoT7hZnHrwy8gKJaZxs8Wz/piCqKqZVqXsGpNbQAscK4bjLg3k4okUUZl6Y72p6gQydQfJ751NnJfcQSXiL1cswm0XH2M5ds3f38P9b+/Ee2kGZQNWd4XQGfaAnfCJF2JUox2xAxJCcSASNwOcfeE4esMxBH2KecMJJTCrpRbjJBP+KKNVrtgZiUwfgU9hpnD/jrFLe2njQTTXhPCzi+bj++ceha+edoTlM0Twz8kisLfrFXx64eSkY+KGn9ZcDZ/CsPlAP/bYKpr/6/Qj8cw1p5nCsDFN4NaOrATOntuCRdOasOkn5+DLp0wHAKzbqwuohur0ykX+bFkR2gV1z1B26aKyOygcU/HUmn2mG3Cyw05cVgK7Dus+d/E3CbkIv3R97oM+ZroEAz4FJx3ZDACYNd59ni5gVSixuIawTQnICk7OFKut8CfFQvy+5BTRiKslwBHwJ8cEGjNwB4VMSyC1FhiuJeDkpioHym9FBcAulBK51OmltmVKlsMcV812oxYa0UJXKAFrszU9JlBXmRBEIhd+fH2FZSdr9+OKfuhfWDwV2289DwDwyeMm4fRZY8G5fmO8s/UwFk9vQtCv4MrTjjAVyPeWHIUpTVWmP/zM2S1J6w74FHzzzJlJivInF87DK9d+xHJMuINCfh+mjqnCBwf6kuY4iJtZCEM3JeOGUDQnTmsyO0X6FGYK7vV7e8EYUONi3dg5e24LTpzWBAD40inT0FwTxLg6ax1C92AMm4y005a61DUKgHWzsWGf1dVotzIAoFIS9PZ252758WL363NIwwT068R0B/kVXHriFLz6nY9gga3HfvLrbNlBUdWMWQR8zOLOEUNmvnhSG6JxLSlO5LQ2p9bZnHMpJuDsDkoVGBaumnQ1RcONCXzuxCm4whhmU06M+mIxINkPKDR9Jn8+edMvLlz5IhmKyT7z9O6lXBFr6OyPGB039WrZSQ2VWLWzG6t2dlt29ZMbdSF5xlHjzOCYU6XsjHH6zk4uwlEUhiPG1mDZtsPYdXgI+3vDpqCTmTepHq9993Tz99s+NR9XnjYdtz27Cce0JlwlV585A4cHIrjvrR3mMZ/Ckna18mapbUw1PjjQh56hGOorA6bVI8z6GeNqwJizeyQVcyfWIehT8M2zZlqOT2yoBGPA3p6wnhWU4U0utxy+8fw5uPH8OUnN73qGYmYMxF7J6oS8I16+47D5c2XAl6RgAKugt2d6ubmDRKzAzU0hB4YDRovlqWOq01Zpy/nwMY0jHNMkJaBYsqb29YRRHfShOuRDJK4lTVTzKyxJ2DrFBMwRpQ5VucJ9lVFguEDZQWfPHZ/xc4uJJywBewqbuMYyaSUhB6niZoFK4nE5e2aoCJaA3BWyLxLHLqOv/WxpHJ58w58yoxn//j+n4jMnTDZ3y/UO/nNR7WvPQa8K+TEYU/HwCr0WQLgDUhHy+zBvUj3uu2IRvvWxWZbHvn/ebDx99amWYz6F4cErF1t+F4ytCWG7IXBmtiRcEGJHN31sDV659iM4dUb6dck014TwwS3nYPH0MZbjQb+C5hpdwNrjBtlir4jtHoyaldqZ9BSSBZ2cmtvaWOkYYJRbIiRX9aa2BNyKmAI+xSzMymZYusUSiOuBYfE3s7uD9veEUVsRQMjvQ1zjSdXACnOoGLYpgZufWIdXNul9sgL+ZEtAxMgyCgyn/GbW67PcWkAMB08oAfsFIXbyGSkBeXqTlmwJyP5MJ3dQJK7iOw+vNoW1zO0vt5vZNn3hWGaCIa6ZSq2jL4LdXUOoCCgI+hPfxS7I50ysA2PMFGoLpybv5o+fopv3/3H8JMvxqqAPnAO/fakd58+fgCPGpvYFpyPk91kUlmDx9DEYY7ir5L+LnIo4QxrhJ+/opo6pdnRlDBexDre2BsMh6FPwRvshs1I7piYLOzvytdc7lLg2JjVWOu7cU3XIdLcE9ONuA9NlYS+/h3y9pXudyA4ShVp2d9DeniHUVvjN97cXY+oxARd3EPTmdPcu3W42KXSqExDnJrPAcBp30CiTmp5wB9n93+JvnInc0LKxBBwqiZe2H8LDK3bj8EAUd3/xBMtjoqT9hLZGvLu9C/WVAay+6WMp1xNVOSY0VGDHoUFTCbQ2VlkGq7t11ZzWXI0/f/EEfOiIMUmPja0NmbEAmWpp5+Tk6x8uD3zlREugGkjsqiyWgPScmeMSCqiQWRa6JdBnia3kyhWnTMMfXt1iGfQzGI0j6HePZchN9eSivdbGSscAYyolkC4m4NY0Td5AyYI9XYDTnh3E1cRQG7+SsC4A3RKYPaFOUgLWzZBPUZLuVXm8pEi3Fb2Tgg4Vw8JKcmsgB2SRIlqMDJAiMsp0mjN2gSEEeyZ/S3k3FneyBNK4g8SOJ9VnvbtdT0fsGYphzg+fSTnDOKZqmGgU7HT0RbDj8CAmN1bixxfOwxdPagOgF4W5cfpR4zLqqS6Qs13yKRRPOqIZR46zDucW963PwRLwKQxtUqwjnzt/O801+bcEzp+vVxLLu9x03WjlFObBaByTmyoxpakKC6Y2Ou7cUwk41xRRQ7A7Vf0CVsGfnTvImh1kCQz7FYslMGh0jA0ZjydZAiliAhpPKAExDCfgU5LcNG3NVagJ+VO2/0jEBFJrgXLM8MmF0fVtXLD/0bJpKGpxB0nTrwTyBeukBEQAMNMsgsGoisdtBVEyMVUzM366h2LY0tGPGS16vvzNn5iLp68+FY98/UMZfVYmVIcSgiWfQtEJscOSb2BhLYyvq8jZR58pY/IUEwCA/zy5DQAcm+ul60ZrT6OsCvjx2ndPxyePa3WOCQTdb2fXFFFDsLvpVMtsXF8W7iC5YljVEI4nlEDQ5g4CYMQE3CyBZHeQOUlN49hiKAFRAOiUHXT6rHFYeeNZKWc+hzKsExhlOsAbSsDNEnDpz+X4XCCx+5CViDDTa0N+x5iAPEpPvrhT7TacGnLJaxDZPZv29yIa18zMHkAPEI+rdW/TkC2yJWCvss034kZ3sgQmNVTmNK83G8YYlkA+XE43njcHG360xHLuxG40XQ8qOUtmMBJHQBK8TlkpqWMCqd1BbpejvKMPZuEOSqoYjmpmYNiv6IFheWNUV+E312jvvKp3EbW+v7gX4xo3M6HE/afXCVjXxxhLeV8Bw0sRHQ14QgnY/Z2JfP/0JoHFHeQQExCCvbk25KwEjJTAZ9cdwJwfPuv4vungnOPmJ9Zh1c4uxFRu7lBFT6CZLbWpXp4TFkug0ErA+DPJwkFk6kxsqDCHmhQakUUVdalKzQZFYagM+uBTmBlfEe6mdN1oZXfQQFS17MSdLAE3lw/gHhANphF8boHhdN0wZXdVOGYUi/kT7qCoah13KVsCD7yz0/JeioMlIBSkpnGzlbZo0OcUE8gEcd2lEwtuQfSRijcCw7YLQvyR7T1KnHCyBORjYkLWmOqgozuoyyFIu/lAn6Vhlh37rNZIXMO9S7fj3qXbAejBWp/CTCVw5LjcMnZSYYkJFMkdJN/v1SE/pjVXY35rgyVIXUiEMHLrTzNcair8GIiqenrqgf602WD2aWuWnbiDIEoVsExnCbgrAWd3UDoriUlVOOG4iqGYarqrhDso6FdMRajHBPTH19uqqp1iAgKVc7N6WPTN8ivW7KBUytGy5gxl+2gLDHtCCdh3TeJyTxcAAhKtIgBp98GtZjqguxCcmoKJakiZz9z5tmPjNMGdr23FzkODeG1zB+67YhFm2gKoQb+CyoAP/ZE4mqqDBd0hy9XWmcxnzQXTHWS74V/6tt53yF5EVCiyCZxng95lNWJaN9lYAoB7YPaIsdXY0jGQRgmkjgm4nVr5c+Qce7dAskBeSn84DlXjZkxAuINkxtaEXBWV02QxgapxM11UTNST20ZcungKbjx/Tsq1CkS8QPSPciMf88rLCU8oAfuuSQjxTDZ6VndQIiNB0B+NI+hXUBMKJI11BIAuByXgpBjsPLNuPwDg0ZV78M0zZ1geCxi9gfojcYu7phDIlkAhM3IA5+wg+XOL1XzrjNnjcMkJk3HNmTPTPzkLhPAQSmAgzYwLu8vQ0udH8nn/86qTcbg/ilW7uuCGm4ANpY0JGMF6ZlUk6ebwyn8pobwrbNlB8kyAj84ehz1d1tYg5me5tLQA9HNkLxwLSL2GKvw+1+9up74ygKXXfdSSluzEaFMCOW3tGGO3McY2MsbeZ4z9kzHWID12PWOsnTG2iTF2tnR8AWNsjfHYb1ihJQuSqyGziQnIz5Fzk4UVMRhRURPyozKoWNxB7+/uxo2PrU3a8asaT5t9INNUnaxcAj7FNK3devjni0IrGRmn7KBSEPL7cOtF81MOwxkOQngIIbNie1dKazRm2y0HXNxBdRUBtDVXp7YE0sQE3NYhHq8O+i2COK07yOFhUwkoDDFNM91dtRV+NNeEXNfoUxTLxkC+PFSeXHQX9CnmPZatwJ7Y4FyNbV0PKQGZ5wHM45zPB/ABgOsBgDE2B8AlAOYCWALg94wxIU3uAHAlgBnGvyU5riEtSZaAJv7PLjCs2oZdAHqdQFXQh8qAzxIY/u4j7+Ovb+/A6t2Jcn/AfUC2HZEBdHgginDc6jYI+hTTtC50xkyqXiv5xik7aDQhK4ELjp2IR1ftwaoUnWzt3WotbZ8dhHCq/VR6d5DzvSCygEI2F1m6WblOCklsKAI+BbG4PvjmkhMmm+Mu3XbsfltgWM5M4lyvyreszRg0DxRmQzHars+clADn/DnOubBp3wbQavx8AYAHOecRzvk2AO0AFjHGJgCo45y/xfWtx18AXJjLGjLBLZqfyY7cqXeQ/lr954FoXLcEAj4MxVRzR+U09ANwn40KAI/+10lmf5zffe54zGqpRWd/FBG7JeBnphKoKrASKOauXHxWqs+867KFeNxhethIQCiBqqDPHOjjNi4T0C0B+VQEXNxBglR/KjcFkTYmYKSl2pWIojCc0NaI3372OPcPhTWOJFyLAT9D2BheP7Gh0vTFuykqny1F1P7V7ZX6shIYTpZQOkptqeabfEb6rgDwtPHzJAC7pMd2G8cmGT/bjzvCGLuSMbacMba8o6Nj2AtzcwdllB2kye6ghDAWhwejep/0CqPHjigOGyfNzpU7XKbKOKkJ+U3/6bTmaoypCTpaAiImoL+meDv1QmPGBFJclWfOacExDtPDRgJCCVQGfeZu1m2YPKBbnnJxUzpLwGn3fZw09cyJdO4gce845dg//LWT8HGHkZVAQunIu/sqqU5ABMXl72FXAqK2wl4sZt+J292lQX9iME0hMnkKoVhKSdptJGPsBQBOPVBv4Jw/bjznBgBxAH8TL3N4Pk9x3BHO+Z0A7gSAhQsXDjs1xK65VYcsHycGInH895Przd+dLIFITENFIOGeEcMzhsRcVp+CmS21Zt//VEqgKujD7Z87Hv9ctQeTGirRVB3Eur29yZaATzEzNQodEwCAa86cYQ6hKSSj3h1kfK8Kv88Ufna/v0xc5agJ+c2U4aBLuqbASTb971cWJ7W0lklfLMaMz85uvyiWIgt2YQkE/YrpFpXf1+5yGlsbQs9QLClF1O6Tt1vXfkUx789C+O89lyLKOT8z1eOMscsBnA/gDJ7YTuwGII+MagWw1zje6nC8qKhSoYmdpe2d+PrfVuKN752Ou17fZhmsbh+ADeg50E3VQXNnPhRT0QC9uGf62Go8+Y1TsHTLIby0UR/RmComUBPyo7WxyuyyOaY6iEP9kaQiNDkmUIwCqnxnybgh7tfRZm4LxPfinJtCPNWmIGaMhQz5FUTimm0UZGYxgYqAL2XKa/o6AaPzZ5o2Eclr0f8PWdxBwhJwVmZ2S6C5Joj2g8kpok6C3a+wxDwBvxQTKMClRIFhCcbYEgDfA/AJzrncK/kJAJcwxkKMsWnQA8DLOOf7APQxxhYbWUGXAXg8lzUMh5hDqqfgZ89uQs9QDB8c6E+6MeRdGzcMGFEJKdoP7zXGIA5G42iq0nP4z5rTYvpOU1sCVoHeVB1Cbzie1F5AdgcVM3un0JhdREfZTksgplsxlhg4bs+Xl4mrGnxGxTFgFZhOgshp952OdP1yAjlbAonrU1itbllOdsU21mh/Yp8s5vTda6VBSRWSErC3j8gHo80dlOsZ+h2AWgDPM8beY4z9AQA45+sAPARgPYBnAFzFORfb2a8DuAt6sHgLEnGEoiF2DE6tG8JilJ5PSS5Vd4gJhA13kPBTrzayPQYiqiVoKy58+5B6QdCnJPldRQ8beai8/l4JH2mxWikUg3JJES0UP/2Po/HNM2fixGlNpiBJtSnoGYqhMuhLdPqUrg+nXb+4mrO5JoI+XUinswTS9d2xk4gJSJaAsWFxK3pjjOGrH55u/i4qxKOqltYSkFuaNFYFJSWQ1bIzYrRdnzlJEM75kSkeuwXALQ7HlwOYl8vn5kqiB5CDEjCCsFFVTR5k4TBvWMQAWuoqML6uAqt3dwPQLYEJUp656LroViks+snIiA6aOw9bRwUG/AmfZzFiAsXCdAeNUktgTE0IVxuFf6Yl4LIFj8Y1rNrZjc+cMBnPGYWD6fLXheCrCvpgu2RcybSBXLrPtmO6g/zp3EHWv/X158zGsa0NuPrB98wBRr1DcWtMwOH6kC0BRWFm0kchrqXRZql6ooGcHeHbd8oOEulmQ1EtaRch79q48aNQAgBw7OQGqyUgCWix4zrUn6wEvnH6kXjwyuT2z63GfOAtB613tF4MYyiBUWgJjLbJTU4kAsPOlsDq3d0YiqlYPH2MufNM5+YR75XNxiBtTCDDz7YjZlvPmZiYImc2kEvTCO+coyfgg1vOQYuxieodilmEudNOvDakWwJC0YivU5DAMFkCIx+x+XLahIkg7FBMTfpjy/eJaQnENTP41dpYidc366msg1FrSwex4znkYAmMqwthypjkQTCtTXpqabttyExAqojM1kwvZ0Z7dpBMIjDsYI3GVPz03xsQ8DEsnp5wHWVsCWQRJ0qnBMRGKdvr7PSjxuGhr34IE+orcP/beldQcT8FLAFu9/cVKaK94ZilTiBVTEC85v+cMQO94Rg+c8LkpOfmymiLCXhSCQg0jUPTOF5v78RpM5rBGEPYyN4R80rd4Mbro3HN3OGILpGqxpMsAXHhr9qZ3N/FrTtnXUUA9ZWBJBdSwMfMzKZRdj0CGH07LSfEpsDeJA4AVu3sxsqd3fjJhfPQUBU0hZ5dEP/84vmWNuLCzZmVJWC2knZ+XFgX2QaGAWDRtKakgfeAdeBMquE0omq+x2YJpIoJCCXQVB3ELz99bNZrzgTKDhpFaJzj78t34fJ7luGx9/YAyLytg8a5WRgm3EGisKdrMIqoqllaH4ub6Ln1B8wxeADww/PnYMk8pzIMHVFoFrSZ0GYe9CjcNY/G72QnUSzm4JKM6ckD8ybVA0gIHbsl8OmFk3GsVDgn3qsyi5bbAR/DnAl1+I1L5a+4H7KNCQic0lPTuYME04xxohceOyltTECkTBdj+txoi1l5WgmoGsd+I/Nmy8GBjFpLCzTOTdeRKI2vNfzzB3r195Szg+Rd3IXHJYqkz5s/IWUet/CtyvnWQb9i7niyueHLHZF26yVLwGnTMRTVjwnB5ktRtSsjYl3ZzF1gjOHfV5+KT7hU/oqNznDdjhUOr7Omhbq/b0NVEFv/77m44pRpad1B4jwWeuYFMPrcQZ5WApwnAklDMdVxKIwd8ffnPJFJlLAErEqgxhITSJzq8XWJVrXpLihhCVQEfBbf8A3nzcYN587GaTNS9z4fSRQymFduMMaMAqdkJSA2FwkloB8PpuncKdxB+ewntWBqIwDg7Lnu1moqnFpOy1ZtulnFZj+pNO6g46c2AAAuP2nqcJaZFaNtk+LpmIDKuakEBqOq4xQwO36fXvKucW5mEglLoMZQAjc9sQ6AtfhL3knVVyV2K+nG9E1uMiwBY5BMXyQOH2OorQjgK6dNT/nakYoX3EGAviN2KhYbslmYwhJI55IRCqUyj0Nx5k6sx7afnpvXWRKWYrEMi7l8aZRA25hqbL/1vNwX50E8bQloPNHbfygax45D6ZOrRVBL44nGVSIwLGICuw7rfYLkm1HOh64JSU3B0tzYsiXwP5cci6Mn1Y+qKmEnRttOy42AorhmBwEwB7OLSyedS+b0WeMAABcc6+zaGS75HvnhT9MDyQlLiqjDekZTllyx8bQloGkJv/5gVMX6vb1pX6ObtypicQ13v7ENQOJmtff2b6xOFIDJwr7axU3khGwJnDG7BWfMbkm7xpGKEIce0QHmhC07phLwC3eQfkLS7ZpntNSW7W5YTobIxh0kYNJXF27RoE8xm8eREhg+3lYC0m5+KKZi/b5etNSFcKA3Oa1NIHb0//vuTvzDGBYvbtY6qWrx5xfPx/FSG1/5IpWVRTr/t5hLUKi5t2WFh2ICAFxjAkMxFT6Fmdea2Plmk7hQTiz7/hnmRgmwxgkydQc5FYuNr6/AzsN6y7Jsi9mIBB5XAtwM7vYOxdDRF8GcCXU40Os+u8DcuUv3oz0mAAAnH9lsG8cnWwKZn/bqkB9N1UFPXeSjLQXPjYDPzR2koTLgM68f4T5xazHhxDnzxuOkI8bkZ6E5Is/WAKyu0XQxMYElJsCSlYCoyC8Wi6c34fz5+XW7lQpPKwFVcgcdHoyiayCGDx0xBtjkrgTEDdlQlXD1iF26HAMYW2MdVi1nAdVmmb0xb1K92UdoNCNSRD1jCfiYY7HYUEy1TOQSSjGTIUiCOy5dkPsCC8REaepeIMO/tVOKaJN0D2bb6jpXnNq8jFQ8rQS45A7qGoghElfTZlaISUtcMgWEEpB3/nYfpfxYtv1+7vzCAk/sjs0UUQ98VyCVJaBa3H+fXTQFr2/uxKyWwg/2KQYtdRX4yYXz8MA7Oy2ZcqlwShH1SRbFcCqaCR1PKwGVc0QMS0D07Hcbdi0wG3/FZSWQ3QVYlWWBlyfiARJeyQ7yK8w1MCz/zc89ekLZBnyHy6WLp+LSxZnn9Du1kpatawoMDx9Pnzk5JiAIpRHoIpAVVROvq7ApjnQb2Xyn3I0WhFr1yqYu4FMc/fwiJkAkcHIRTqhPuJVICQwfT1sCeoqodSeWLgArAlmyGS/v2t75/hlkmg4TXsAe8OWI3+dsCQxF07slvYa8cRKtXqYbvYUAcgflgqfPnJ4iarUE0rleRCBL9CpZNK3J4g5qqauw1AcQ2eOVwLAeE3BwB8XVtBapl9llZAS1NVfjN589DicfOYas6xzw9JUmZwcJQn4FL3zrw7jo+Fbz2BKpb4qICYjGWr/73HHDugDlqWOEFe8Ehq1tI7Z1DuCljQfIEkhDnxG/a2uuwieOmYi/fXlxiVc0svG0O4hzJ3eQD0eOq8GSeePxj5W70TamCl8+dRqesY34Ezu4TItd7Lx87UdcR/p5FXE6vLKr8ysK+rXEJuSTv38T3YMxtDZWei4ZIFPqKvzoNeZ029OwieHhaSWgGoHh2gq/OQBexASEi0fjVqFkHxDuT9PZUWbGuBozR5pu8mS81EUUEJZAYhPSbTQw3N01hJOPaC7VssqWuy5biKMm1GJLxwDaD/Z7ZrNQaDyjBI6eVI81e3osxzQORGIaGqoCCSVgCH+RKqpxbsn28dssgUAWlsDz3/rwsNfvJbwS45NjAh19EYT8iulmHE1zIvLFmXP0vlmtjVX48MzR00K91HjkdgP++qVFuPnjcyzHRAO5hspEIFcIf2EJcG7NVhFZCCIw7JVdazFINJDzxjn1+xTEVY7dXYM44ZYXTAUApE9VJoh84ZkrraEqiEXTrL1UxHSwBqlqUbiDZEtAsVgCRnaQEdAbbVOGSgr3VtuIgMIQ0zTTCpXxiiIkSo9n3EFAsnBRNSAc18xRjUBiB5aICXDLDSkCwbG4BoV5p7q1mHglO8hpqMxXPzwduw4PmrMBCKLQeEwJWH+PqipUjdssAd0CEBWIemA48RrR/zymao6j84jhY7qDPKJYRUxAVgRzJ9bj+nNml3BVhNfwlBSzm9iDET09z2IJGMLfZ/ZwBxiY9LiuJKKqRq6gPOO97CC9gVxMmikwhgoNiSLjKSVgFy4DUd0Xaw0M66dEdDe85swZkBOAhIUQjZMSyDeiM6tX/OF+RU8RjUkB4YYMu2oSRL7wlDsoyRKIOlgCRv5+yO8zOzd+cKDPfNzMDiJ3UMHwiiXg9ymIadxsIvfNM2di7sT6Eq+K8BqekmJ24SKUgJyT7dRATrHEBMgSKDReOa1Bo4GcqBU4ZQYViBHFh5QArNW7zoI9uW95jGICeUfEBOQYzGjG71PAeaLmJJBF9TlB5AtPKQHZHVQV9GHQiAnIXUCdStEtloBZMczJHVQgPBISMDcUYqDRcPtQEUQueOqqky2BmpDf0RJwQnEYGxmLa1n1DSLS47WGemLMqOgZRJYAUQq8pQQkYV4Z9GHQ2IHZJ4PZkZWAiBlEyB2UdzymA1ArlMCQUAKeuh2JMiEvVx1j7FrGGGeMNUvHrmeMtTPGNjHGzpaOL2CMrTEe+w0rYitA2dr2MYbBmLAEUp8G5hAY1mMCdNPmEzFZzCvuoIQlEAWQXUdagsgXOUsxxthkAGcB2CkdmwPgEgBzASwB8HvGmNhu3wHgSgAzjH9Lcl1DpsjuIEVhpvshnTtIFkrCEuCcbtpC4ZXAcHVIv+4S7iDaVBDFJx9X3a8AfBdWa/4CAA9yziOc820A2gEsYoxNAFDHOX+L69u+vwC4MA9ryAjZrSN7ctJ1bHSKCQDUPI7IjdqQXp/SJSwBup6IEpBTsRhj7BMA9nDOV9u8OpMAvC39vts4FjN+th93e/8roVsNmDJlSi5LBWCzBKT1VgR8+Od/nWRp5StjbSUtp5PSzi2fmCmiHpGFwhLoMWIClG1GlIK0SoAx9gKA8Q4P3QDg+wA+5vQyh2M8xXFHOOd3ArgTABYuXJhz3NDHXJSA34fjpjS6vs6pWAwgdxCRGzVGTEBYAkFSAkQJSHvVcc7P5JzPs/8DsBXANACrGWPbAbQCWMkYGw99hz9ZeptWAHuN460Ox4uC3J1SWAUKS5+axxyyg+T3IPLDRQt0o7ClrqLEKykONRVGYHhAWAJ0PRHFZ9hbD875Gs75OM55G+e8DbqAP55zvh/AEwAuYYyFGGPToAeAl3HO9wHoY4wtNrKCLgPweO5fI3uE/K4I+NLOKnXKDgIokJdvvnLqdHzwk3PQ5JFOmpUBHxQG9JnFYqQEiOJTkAZynPN1jLGHAKwHEAdwFedcNR7+OoB7AVQCeNr4V3SEVZDJwHfZdSQLfrpp8wtjzJzX4AUYY6gO+dEXjsOvMBqcTpSEvCkBwxqQf78FwC0Oz1sOYF6+Pne4CMFe4dAwLvm5iZ9lFxCZ70Su1AglQNcSUSI86c8YUx20uIPSwVxSSyk7iMgVUTAWoGuJKBGeu/L+8fWT8PQ1p5qWQCgjJZD42TpvmHZvRG6IDKFABhYpQRQCTw2VAYAFU/VUUNMdlKZQTH4uYFUIZMITuSKUAG0oiFLh2e2H8O1XZhQYTvwsu4Z8ZMITOSIKxijTjCgVnr3yWBYxAbd2E9T6l8iVGqN1BFmVRKnwrBLIxhJwiwlQsRiRKzWGJUDuIKJUeFYJCGFeXxVI80xrV0tmsQQ8e/qIPCGqhulaIkqFZ688sfFqzEAJKJQdRBQIM0WUlABRIjx75YnRkg2V6VsUWLKDpOOkBIhcMbODKCZAlAjPKoHesN60qyETd5CbJUC7NyJHaqhYjCgxnr3yeof0pl0NVektAebSgpoCw0SuVJMlQJQY7yoBwxLIJCYgw6QzRv3fiVxJuIPoWiJKg2evvN6hzN1BMlZ3EO3eiNxIuIPoWiJKg2eVgGbMKcvEHSSjUIookUcoO4goNZ6/8uors3QHSflB5A4icqW2gmICRGnxvBTLdgdGDeSIfEKWAFFqPNdFVHDbxfOxdMuhrF/nNmWMIIZDVYDaRhClxbNK4FMLJ+NTCydn/TqKCRD5RFEYakJ+yg4iSoZnlcBwkS0BL83DJQrHFadMw/FTGkq9DMKjkBLIgrPntlhjAlTlSeSBb501s9RLIDwMKYEM2XzLOfAxZqkeJncQQRAjHVICGeIk8MkdRBDESIe2sjlA7iCCIEY6JMVygNxBBEGMdEiK5QC5gwiCGOmQEsgBsgQIghjpkBTLASrwIQhipENSLAcC1DuIIIgRDimBHKAuogRBjHRIiuUAuYMIghjpkBTLAXIHEQQx0iElkAMBKhYjCGKEQ1IsBxTqAU8QxAgnZyXAGPv/GGObGGPrGGM/l45fzxhrNx47Wzq+gDG2xnjsN0zuyEYQBEEUlZwayDHGTgdwAYD5nPMIY2yccXwOgEsAzAUwEcALjLGZnHMVwB0ArgTwNoB/A1gC4Olc1kEQBEEMj1wtga8DuJVzHgEAzvlB4/gFAB7knEc459sAtANYxBibAKCOc/4W55wD+AuAC3NcA0EQBDFMclUCMwGcyhh7hzH2KmPsBOP4JAC7pOftNo5NMn62H3eEMXYlY2w5Y2x5R0dHjkslCIIg7KR1BzHGXgAw3uGhG4zXNwJYDOAEAA8xxqYDcPLz8xTHHeGc3wngTgBYuHCh6/MIgiCI4ZFWCXDOz3R7jDH2dQCPGq6dZYwxDUAz9B2+PMW9FcBe43irw3GCIAiiBOTqDnoMwEcBgDE2E0AQQCeAJwBcwhgLMcamAZgBYBnnfB+APsbYYiMr6DIAj+e4BoIgCGKY5Dpe8h4A9zDG1gKIArjcsArWMcYeArAeQBzAVUZmEKAHk+8FUAk9K4gygwiCIEpETkqAcx4FcKnLY7cAuMXh+HIA83L5XIIgCCI/UMUwQRCEhyElQBAE4WFICRAEQXgYUgIEQRAeJtfsIE/ym88eh8aqQKmXQRAEkTOkBIbBJ46ZWOolEARB5AVyBxEEQXgYUgIEQRAehpQAQRCEhyElQBAE4WFICRAEQXgYUgIEQRAehpQAQRCEhyElQBAE4WGY3v6//GGMdQDYMcyXN0MfdlOO0NqGB61teJTr2sp1XcDIX9tUzvlYtwdHjBLIBcbYcs75wlKvwwla2/CgtQ2Pcl1bua4LGP1rI3cQQRCEhyElQBAE4WG8ogTuLPUCUkBrGx60tuFRrmsr13UBo3xtnogJEARBEM54xRIgCIIgHCAlQBAE4WFGtRJgjC1hjG1ijLUzxq4rg/VsZ4ytYYy9xxhbbhxrYow9zxjbbPzfWKS13MMYO8gYWysdc10LY+x64zxuYoydXYK13cwY22Ocu/cYY+eWaG2TGWMvM8Y2MMbWMcauNo6X/NylWFvJzx1jrIIxtowxttpY238bx8vhvLmtreTnzfgsH2NsFWPsX8bv+T1nnPNR+Q+AD8AWANMBBAGsBjCnxGvaDqDZduznAK4zfr4OwM+KtJbTABwPYG26tQCYY5y/EIBpxnn1FXltNwO41uG5xV7bBADHGz/XAvjAWEPJz12KtZX83AFgAGqMnwMA3gGwuEzOm9vaSn7ejM/7FoAHAPzL+D2v52w0WwKLALRzzrdyzqMAHgRwQYnX5MQFAO4zfr4PwIXF+FDO+WsADme4lgsAPMg5j3DOtwFoh35+i7k2N4q9tn2c85XGz30ANgCYhDI4dynW5kYx18Y55/3GrwHjH0d5nDe3tblRtLUxxloBnAfgLtvn5+2cjWYlMAnALun33Uh9QxQDDuA5xtgKxtiVxrEWzvk+QL+JAYwr2erc11Iu5/IbjLH3DXeRMIFLtjbGWBuA46DvHMvq3NnWBpTBuTPcGu8BOAjgec552Zw3l7UBpT9v/wPguwA06Vhez9loVgLM4Vip82FP5pwfD+AcAFcxxk4r8XoypRzO5R0AjgBwLIB9AH5hHC/J2hhjNQD+AeAaznlvqqc6HCvo+hzWVhbnjnOucs6PBdAKYBFjbF6Kp5fD2kp63hhj5wM4yDlfkelLHI6lXddoVgK7AUyWfm8FsLdEawEAcM73Gv8fBPBP6KbaAcbYBAAw/j9YuhW6rqXk55JzfsC4UTUAf0LCzC362hhjAehC9m+c80eNw2Vx7pzWVk7nzlhPN4BXACxBmZw3p7WVwXk7GcAnGGPbobuzP8oYux95PmejWQm8C2AGY2waYywI4BIAT5RqMYyxasZYrfgZwMcArDXWdLnxtMsBPF6aFQIp1vIEgEsYYyHG2DQAMwAsK+bCxEVv8Eno567oa2OMMQB3A9jAOf+l9FDJz53b2srh3DHGxjLGGoyfKwGcCWAjyuO8Oa6t1OeNc34957yVc94GXX69xDm/FPk+Z4WKaJfDPwDnQs+Q2ALghhKvZTr0yP1qAOvEegCMAfAigM3G/01FWs//QjdxY9B3EF9KtRYANxjncROAc0qwtr8CWAPgfeNin1CitZ0C3cR+H8B7xr9zy+HcpVhbyc8dgPkAVhlrWAvgh+mu/zJYW8nPm/R5H0EiOyiv54zaRhAEQXiY0ewOIgiCINJASoAgCMLDkBIgCILwMKQECIIgPAwpAYIgCA9DSoAgCMLDkBIgCILwMP8/6+CAron6yj4AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > 200:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
